{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1. Featrue Engineering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings \n",
    "warnings.filterwarnings('ignore')\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy import stats\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import seaborn as sns\n",
    "import missingno as msno\n",
    "import re\n",
    "import pandas_profiling\n",
    "import joblib\n",
    "import time\n",
    "import sys\n",
    "import lightgbm as lgb\n",
    "import xgboost as xgb \n",
    "import catboost as cab\n",
    "import my_functions as my\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.model_selection import train_test_split,StratifiedKFold,GridSearchCV\n",
    "from sklearn.model_selection import KFold,cross_val_score\n",
    "from sklearn.metrics import make_scorer,accuracy_score,confusion_matrix, mean_squared_error,roc_auc_score ,mean_absolute_error # 评估标准\n",
    "sns.set_style(\"ticks\") # dark/white/darkgrid/whitegrid"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.1 Data Pre-Processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    " # 5折交叉验证\n",
    "def kvalid(train_x,bc_y_train,k=5):\n",
    "    kf = KFold(n_splits=k, random_state=42)  # sklearn的交叉验证模块，用于划分数据\n",
    "    MAE=[]\n",
    "     # 交叉验证划分此时的训练集和验证集\n",
    "    for train, test in kf.split(train_x):\n",
    "        kf_X_tr = train_x.iloc[train]  # 训练集集\n",
    "        kf_y_tr = bc_y_train[train]    # 训练集标签值\n",
    "        kf_X_val =train_x.iloc[test]  # 验证集\n",
    "        kf_y_val = bc_y_train[test]    # 验证集标签值\n",
    "        clf = lgb.LGBMRegressor(objective='regression',metric= 'mae',silent=1,num_leaves=80,learning_rate=0.03, n_estimators=300)\n",
    "        clf.fit(kf_X_tr,kf_y_tr,categorical_feature=cat_feat,verbose=0) \n",
    "        result=clf.predict(kf_X_val)\n",
    "        MAE.append(mean_absolute_error(bcback(kf_y_val,maxlog),bcback(result,maxlog)))\n",
    "    return {\"MAE\":round(np.mean(MAE),2)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of dataframe is 37200128.00 MB\n",
      "Memory usage after optimization is: 10200232.00 MB\n",
      "Decreased by 72.6%\n",
      "Memory usage of dataframe is 12000128.00 MB\n",
      "Memory usage after optimization is: 3200232.00 MB\n",
      "Decreased by 73.3%\n"
     ]
    }
   ],
   "source": [
    "train=my.reduce_mem_usage(pd.read_csv(\"used_car_train_20200313.csv\",sep=\" \"))\n",
    "test=my.reduce_mem_usage(pd.read_csv(\"used_car_testB_20200421.csv\",sep=\" \"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.1.1 Dirty data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=pd.concat([train,test])\n",
    "data=data.replace(\"-\",np.nan)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.drop([\"offerType\",\"seller\"],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.1.2 Missing value"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 删除缺失率>=0.6的特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>nullrate</th>\n",
       "      <th>nullrate%</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>price</th>\n",
       "      <td>50000</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>25.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>notRepairedDamage</th>\n",
       "      <td>32393</td>\n",
       "      <td>0.161965</td>\n",
       "      <td>16.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fuelType</th>\n",
       "      <td>11604</td>\n",
       "      <td>0.058020</td>\n",
       "      <td>5.80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gearbox</th>\n",
       "      <td>7949</td>\n",
       "      <td>0.039745</td>\n",
       "      <td>3.97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bodyType</th>\n",
       "      <td>6010</td>\n",
       "      <td>0.030050</td>\n",
       "      <td>3.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>model</th>\n",
       "      <td>1</td>\n",
       "      <td>0.000005</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       0  nullrate  nullrate%\n",
       "price              50000  0.250000      25.00\n",
       "notRepairedDamage  32393  0.161965      16.20\n",
       "fuelType           11604  0.058020       5.80\n",
       "gearbox             7949  0.039745       3.97\n",
       "bodyType            6010  0.030050       3.00\n",
       "model                  1  0.000005       0.00"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my.Nullrate(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 不处理（树模型能自动处理缺失值）"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 插值补全：\n",
    "    * 包括均值/中位数/众数/建模预测（k近邻/随机森林/lgb等）/多重插补/压缩感知补全/矩阵补全等；\n",
    "    * 根据其它强相关特征对该特征进行数据分箱,然后选择该特征所在分箱的相应特征的均值或中位数,来填充缺失值；\n",
    "    * 高级填充方法可参考fancyimpute库。fancyimpute主要提供以下几种填充方式： KNN、 NuclearNormMinimization、SoftImpute、IterativeImputer、BiScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "mls=my.Nullrate(data).index.tolist()\n",
    "mls.remove(\"price\")\n",
    "for i in mls:\n",
    "    data[i].fillna(data[i].mode()[0],inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.1.3 Outlier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['SaleID', 'name', 'regDate', 'model', 'brand', 'bodyType', 'fuelType',\n",
       "       'gearbox', 'power', 'kilometer', 'notRepairedDamage', 'regionCode',\n",
       "       'creatDate', 'price', 'v_0', 'v_1', 'v_2', 'v_3', 'v_4', 'v_5', 'v_6',\n",
       "       'v_7', 'v_8', 'v_9', 'v_10', 'v_11', 'v_12', 'v_13', 'v_14'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "label=\"price\"\n",
    "ID_feat=['SaleID']\n",
    "date_feat=['regDate','creatDate']\n",
    "cat_feat=[ 'name',  'model', 'brand', 'bodyType', 'fuelType','gearbox', 'notRepairedDamage', 'regionCode']\n",
    "num_feat=[i for i in data.columns if i not in cat_feat+[label]+date_feat+ID_feat]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>power</th>\n",
       "      <th>kilometer</th>\n",
       "      <th>v_0</th>\n",
       "      <th>v_1</th>\n",
       "      <th>v_2</th>\n",
       "      <th>v_3</th>\n",
       "      <th>v_4</th>\n",
       "      <th>v_5</th>\n",
       "      <th>v_6</th>\n",
       "      <th>v_7</th>\n",
       "      <th>v_8</th>\n",
       "      <th>v_9</th>\n",
       "      <th>v_10</th>\n",
       "      <th>v_11</th>\n",
       "      <th>v_12</th>\n",
       "      <th>v_13</th>\n",
       "      <th>v_14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.0</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "      <td>200000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>119.429150</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>184.885438</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.5</td>\n",
       "      <td>30.453125</td>\n",
       "      <td>-4.296875</td>\n",
       "      <td>-4.468750</td>\n",
       "      <td>-7.273438</td>\n",
       "      <td>-4.363281</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-9.171875</td>\n",
       "      <td>-5.664062</td>\n",
       "      <td>-9.640625</td>\n",
       "      <td>-4.156250</td>\n",
       "      <td>-6.546875</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5%</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>41.000000</td>\n",
       "      <td>-3.314453</td>\n",
       "      <td>-1.781250</td>\n",
       "      <td>-3.173828</td>\n",
       "      <td>-1.693359</td>\n",
       "      <td>0.227051</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.007236</td>\n",
       "      <td>0.013634</td>\n",
       "      <td>0.009583</td>\n",
       "      <td>-5.656250</td>\n",
       "      <td>-3.378906</td>\n",
       "      <td>-3.787109</td>\n",
       "      <td>-1.927734</td>\n",
       "      <td>-2.119141</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>75.000000</td>\n",
       "      <td>12.5</td>\n",
       "      <td>43.125000</td>\n",
       "      <td>-3.193359</td>\n",
       "      <td>-0.969727</td>\n",
       "      <td>-1.460938</td>\n",
       "      <td>-0.921387</td>\n",
       "      <td>0.243530</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.062469</td>\n",
       "      <td>0.035339</td>\n",
       "      <td>0.033905</td>\n",
       "      <td>-3.708984</td>\n",
       "      <td>-1.954102</td>\n",
       "      <td>-1.870117</td>\n",
       "      <td>-1.055664</td>\n",
       "      <td>-0.437988</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>110.000000</td>\n",
       "      <td>15.0</td>\n",
       "      <td>44.593750</td>\n",
       "      <td>-3.052734</td>\n",
       "      <td>-0.383301</td>\n",
       "      <td>0.103363</td>\n",
       "      <td>-0.074219</td>\n",
       "      <td>0.257812</td>\n",
       "      <td>0.000809</td>\n",
       "      <td>0.095886</td>\n",
       "      <td>0.056976</td>\n",
       "      <td>0.058563</td>\n",
       "      <td>1.625977</td>\n",
       "      <td>-0.362305</td>\n",
       "      <td>-0.133423</td>\n",
       "      <td>-0.036270</td>\n",
       "      <td>0.140259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>150.000000</td>\n",
       "      <td>15.0</td>\n",
       "      <td>46.000000</td>\n",
       "      <td>3.996094</td>\n",
       "      <td>0.240997</td>\n",
       "      <td>1.565430</td>\n",
       "      <td>0.869263</td>\n",
       "      <td>0.265381</td>\n",
       "      <td>0.101929</td>\n",
       "      <td>0.125366</td>\n",
       "      <td>0.079407</td>\n",
       "      <td>0.087524</td>\n",
       "      <td>2.844238</td>\n",
       "      <td>1.256836</td>\n",
       "      <td>1.776367</td>\n",
       "      <td>0.942871</td>\n",
       "      <td>0.681641</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95%</th>\n",
       "      <td>232.000000</td>\n",
       "      <td>15.0</td>\n",
       "      <td>47.750000</td>\n",
       "      <td>5.152344</td>\n",
       "      <td>1.227588</td>\n",
       "      <td>3.341797</td>\n",
       "      <td>2.107422</td>\n",
       "      <td>0.277832</td>\n",
       "      <td>0.119568</td>\n",
       "      <td>0.168213</td>\n",
       "      <td>0.108765</td>\n",
       "      <td>0.125000</td>\n",
       "      <td>4.054883</td>\n",
       "      <td>2.841797</td>\n",
       "      <td>4.097656</td>\n",
       "      <td>2.187500</td>\n",
       "      <td>1.364258</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>19312.000000</td>\n",
       "      <td>15.0</td>\n",
       "      <td>52.312500</td>\n",
       "      <td>7.320312</td>\n",
       "      <td>19.031250</td>\n",
       "      <td>9.851562</td>\n",
       "      <td>6.828125</td>\n",
       "      <td>0.291748</td>\n",
       "      <td>0.153442</td>\n",
       "      <td>1.411133</td>\n",
       "      <td>0.160767</td>\n",
       "      <td>0.222778</td>\n",
       "      <td>12.359375</td>\n",
       "      <td>18.812500</td>\n",
       "      <td>13.851562</td>\n",
       "      <td>11.148438</td>\n",
       "      <td>8.656250</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               power  kilometer            v_0            v_1            v_2  \\\n",
       "count  200000.000000   200000.0  200000.000000  200000.000000  200000.000000   \n",
       "mean      119.429150        NaN            NaN      -0.000000       0.000000   \n",
       "std       184.885438        0.0       0.000000       0.000000       0.000000   \n",
       "min         0.000000        0.5      30.453125      -4.296875      -4.468750   \n",
       "5%          0.000000        4.0      41.000000      -3.314453      -1.781250   \n",
       "25%        75.000000       12.5      43.125000      -3.193359      -0.969727   \n",
       "50%       110.000000       15.0      44.593750      -3.052734      -0.383301   \n",
       "75%       150.000000       15.0      46.000000       3.996094       0.240997   \n",
       "95%       232.000000       15.0      47.750000       5.152344       1.227588   \n",
       "max     19312.000000       15.0      52.312500       7.320312      19.031250   \n",
       "\n",
       "                 v_3            v_4            v_5            v_6  \\\n",
       "count  200000.000000  200000.000000  200000.000000  200000.000000   \n",
       "mean        0.000000       0.000000       0.000000       0.000000   \n",
       "std         0.000000       0.000000       0.000000       0.000000   \n",
       "min        -7.273438      -4.363281       0.000000       0.000000   \n",
       "5%         -3.173828      -1.693359       0.227051       0.000000   \n",
       "25%        -1.460938      -0.921387       0.243530       0.000038   \n",
       "50%         0.103363      -0.074219       0.257812       0.000809   \n",
       "75%         1.565430       0.869263       0.265381       0.101929   \n",
       "95%         3.341797       2.107422       0.277832       0.119568   \n",
       "max         9.851562       6.828125       0.291748       0.153442   \n",
       "\n",
       "                 v_7            v_8            v_9           v_10  \\\n",
       "count  200000.000000  200000.000000  200000.000000  200000.000000   \n",
       "mean        0.000000       0.000000       0.000000       0.000000   \n",
       "std         0.000000       0.000000       0.000000       0.000000   \n",
       "min         0.000000       0.000000       0.000000      -9.171875   \n",
       "5%          0.007236       0.013634       0.009583      -5.656250   \n",
       "25%         0.062469       0.035339       0.033905      -3.708984   \n",
       "50%         0.095886       0.056976       0.058563       1.625977   \n",
       "75%         0.125366       0.079407       0.087524       2.844238   \n",
       "95%         0.168213       0.108765       0.125000       4.054883   \n",
       "max         1.411133       0.160767       0.222778      12.359375   \n",
       "\n",
       "                v_11           v_12           v_13           v_14  \n",
       "count  200000.000000  200000.000000  200000.000000  200000.000000  \n",
       "mean        0.000000       0.000000       0.000000      -0.000000  \n",
       "std         0.000000       0.000000       0.000000       0.000000  \n",
       "min        -5.664062      -9.640625      -4.156250      -6.546875  \n",
       "5%         -3.378906      -3.787109      -1.927734      -2.119141  \n",
       "25%        -1.954102      -1.870117      -1.055664      -0.437988  \n",
       "50%        -0.362305      -0.133423      -0.036270       0.140259  \n",
       "75%         1.256836       1.776367       0.942871       0.681641  \n",
       "95%         2.841797       4.097656       2.187500       1.364258  \n",
       "max        18.812500      13.851562      11.148438       8.656250  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[num_feat].describe(percentiles=[0.05,0.25,0.5,0.75,0.95])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "power极差很大，最大值达到19312，根据赛题，power最大为600，因此将>600的记录全部化为600"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.loc[data.power>600,\"power\"]=600"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#基于IQR进行过滤\n",
    "def IQRFilter(df,colnames,cut=3): \n",
    "    \"\"\"\n",
    "    df:需要过滤的dataframe\n",
    "    colnames：需要过滤的列名\n",
    "    cut：基于IQR的过滤截断点\n",
    "    \"\"\"\n",
    "    # 下四分位数值、中位数，上四分位数值\n",
    "    for col in colnames:\n",
    "        Q1, median, Q3 = np.percentile(df[col].values, (25, 50, 75), interpolation='midpoint')\n",
    "        # 四分位距\n",
    "        IQR = Q3 - Q1\n",
    "        # 内限\n",
    "        inner = [Q1-1.5*IQR, Q3+1.5*IQR]\n",
    "        # 外限\n",
    "        outer = [Q1-cut*IQR, Q3+cut*IQR] \n",
    "        # 过滤极端异常值\n",
    "        df=df[(df[col].values<outer[1]) & (df[col].values>outer[0]) ]\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=pd.concat([IQRFilter(data[data.SaleID<200000],num_feat),data[data.SaleID>=200000]]) #对训练集进行异常删减，然后重新concat"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.1.4 Label"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用Box-Cox变换族一般可以保证将数据进行成功的正态变化，但在二分变量或较少水平的等级变量的情况下，不能成功进行转换，此时可以考虑使用广义线性模型，例如logistic模型、johson转换等。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_y=data[data.SaleID<200000][label]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x25f8565dd08>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgQAAAFeCAYAAAD6/weaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XtglNWB///3XDLJZDIJAUEFDAKaVLRZEqzaIrEsou1Ct4i9QLZov/ZrIbWxRYKxtlrULBct7raxWKkW/aWLJJXabe3X2lItqaB2TYk0aOIKShUtBiKQmZC5Pc/vj2QGAgmTkGcyA3xe/+g858zMyfGSD+dqM03TRERERM5o9mQ3QERERJJPgUBEREQUCERERESBQERERFAgEBERERQIREREBAUCERERQYFAREREUCAQERERFAhEREQEBQIRERFBgUBERERQIBAREREUCERERAQFAhEREUGBQERERFAgEBERERQIREREBAUCERERQYFAREREUCAQERERFAhEREQEBYKE+/BQJ2/v8ye7GSIiIiekQJBg39rQyLy1LyW7GSIiIiekQJBgbf4gH7YHME0z2U0RERHpkwJBggUjBqYJgbCR7KaIiIj0SYEgwYLdQaAzFElyS0RERPqmQJBgwUhXIDisQCAiIilMgSDBoiMEHUEFAhERSV0KBAkWDQSHFQhERCSFOeNVMAyDZcuW0dLSgsvloqqqinHjxsXK6+rq2LBhA06nk7KyMqZPn05bWxsVFRV0dnYyatQoVqxYgdvt7rXu+++/z5133kkkEsE0Te69914mTJjA888/z49//GOcTifXX389X/rSl+js7GTp0qXs378fj8fDqlWrGD58eEI7aLBCEa0hEBGRU4AZx3PPPWdWVlaapmma27ZtMxctWhQr+/DDD83Zs2ebgUDAPHToUOzv77vvPnPjxo2maZrmI488Yq5bt67Purfffrv5hz/8wTRN06yvrzdvueUWMxgMmldffbV54MABMxAImHPnzjU//PBD82c/+5n5ox/9yDRN03zmmWfM++67L17zkyoSMcxxlc+Y4yqfMTe3fJjs5oiIiPQp7pRBQ0MD06ZNA2Dy5Mk0NTXFyrZv305RUREulwuv10teXh7Nzc093lNSUsLWrVv7rFtZWclVV10FQCQSIT09nZ07d5KXl0dOTg4ul4spU6bw6quvHve5L72U2gf+RBcUghYViohIaosbCHw+H1lZWbHXDoeDcDgcK/N6vbEyj8eDz+fr8dzj8dDe3t5n3eHDh5OWlsauXbtYtWoVt9xyy4A+N57q6moKCgooKCiguro6bn0rHX32gKYMREQklcVdQ5CVlYXff+QsfsMwcDqdvZb5/X68Xm/seUZGBn6/n+zs7D7rArz88svcc8893H///UyYMIFgMHjCz40+y87OjvsDlpeXU15eHrdeIoSOHiHQokIREUlhcUcIiouLqa+vB6CxsZH8/PxYWWFhIQ0NDQQCAdrb29m5cyf5+fkUFxezefNmAOrr65kyZUqfdV9++WX+/d//nUcffZSPf/zjAEycOJHdu3dz4MABgsEgr776KkVFRb1+bioLHjVCoG2HIiKSymymeeJD9qO7DN58801M02T58uXU19eTl5fHjBkzqKuro7a2FtM0WbhwIddeey379u2jsrISv99Pbm4uq1evJjMzs9e6//qv/0owGGTkyJEAjB8/nnvvvTe2y8A0Ta6//nr+7d/+jcOHD1NZWUlraytpaWmsXr069r5U9M4+P5/+wZ8AWHptAbdMvyC5DRIREelD3EAgJ+9/97Yz8z+6RlfK//kCllxTkOQWiYiI9E4HEyVQQFMGIiJyioi7qFAG5ultewh3Lybcvf/Iwsgd7x/k6W17uK5oTLKaJiIi0icFAouFIwahSNcsTCB8ZDYmEDJiQUFERCTVaMoggcLGkQAQVBgQEZEUpkCQQJHIkRGCkAKBiIikMAWCBAobRwJBMKzNHCIikroUCBIoYmiEQERETg0KBAkUViAQEZFThAJBAmlRoYiInCoUCBKox5RBWIFARERSlwJBAvVcQ6BFhSIikroUCBLo2DUEujZCRERSlQJBAkVHCJx2GyY9A4KIiEgqUSBIoHD3NIHb5QAgqHUEIiKSohQIEijSvcvAndYVCLT1UEREUpUCQQJFpwiigUAjBCIikqoUCBIouoYgNmWgEQIREUlRCgQJdOwIgbYeiohIqlIgSKBoIMjQokIREUlxCgQJFDl2DYGmDEREJEUpECSQdhmIiMipQoEggY49h0D3GYiISKpSIEigiGFit0G6s6ubNWUgIiKpSoEggcKGicNuI83RHQjC2mUgIiKpSYEggSLHBAKtIRARkVSlQJBAYcPEabfj0pSBiIikOGe8CoZhsGzZMlpaWnC5XFRVVTFu3LhYeV1dHRs2bMDpdFJWVsb06dNpa2ujoqKCzs5ORo0axYoVK3C73b3WjXr88cfZt28fFRUVtLa2ctttt8XK3njjDZYsWcK8efMoKSnh/PPPB2Dy5MksWbLEwu6wVsQwcNptpDlsgM4hEBGR1BU3EGzatIlgMEhtbS2NjY2sXLmShx9+GIDW1lZqamrYuHEjgUCA0tJSpk6dypo1a5g9ezZz585l7dq11NbWMmvWrF7rGobB9773PbZv384111wDwMiRI6mpqQFg27Zt/Md//Adf+tKX+Pvf/87FF1/MT37ykwR2iXXChonLYcelKQMREUlxcacMGhoamDZtGtD1J/KmpqZY2fbt2ykqKsLlcuH1esnLy6O5ubnHe0pKSti6dWufdQOBAHPmzGHRokXHfbdpmtx3330sW7YMh8PBjh072Lt3LwsWLODmm29m165dVvVDQoQjXWsIooFAUwYiIpKq4gYCn89HVlZW7LXD4SAcDsfKvF5vrMzj8eDz+Xo893g8tLe391k3JyeHK6+8stfvfv7557nwwguZMGEC0DVy8PWvf52amhoWLlzI0qVL4/6A1dXVFBQUUFBQQHV1ddz6VoqYJk6HjbToGgJNGYiISIqKO2WQlZWF3++PvTYMA6fT2WuZ3+/H6/XGnmdkZOD3+8nOzu6z7on8+te/5oYbboi9vuSSS3A4ug75ufTSS9m7dy+maWKz2fr8jPLycsrLy+P9mAkRiZg4bDac9q726XIjERFJVXFHCIqLi6mvrwegsbGR/Pz8WFlhYSENDQ0EAgHa29vZuXMn+fn5FBcXs3nzZgDq6+uZMmVKn3VPZMeOHRQXF8deP/TQQzzxxBMANDc3M3r06BOGgWQyTLN7hMCOzda1sFBrCEREJFXFHSGYOXMmW7ZsYd68eZimyfLly1m3bh15eXnMmDGDBQsWUFpaimmaLF68mPT0dMrKyqisrKSuro7c3FxWr15NZmZmr3X70tbWhsfj6fEL/+tf/zpLly5l8+bNOBwOVqxYYU0vJIDRfbFRdHTA5bBrykBERFKWzTRNjWNb6BevvksoYtIZinDvM6/zsXO83PDJ87n/uWYAGu++JsktFBEROZ4OJkqQcPcIgaN7hCDNYdflRiIikrIUCBIk0tuUgdYQiIhIilIgSJBIbISgq4vTHHZCERPN0IiISCpSIEiQcPdoQGyEwNn1186QRglERCT1KBAkSGwNgePIGgKAw6FI0tokIiLSFwWCBImtIbAdWUMACgQiIpKaFAgSpM8RgmA4aW0SERHpiwJBghy7yyB6BfLhoNYQiIhI6lEgSJCIEV1U2NXFLqemDEREJHUpECRIbwcTgQKBiIikJgWCBOkzEGgNgYiIpCAFggTp7aRC0AiBiIikJgWCBIlEjhkhiK4h0KJCERFJQQoECRKOLirsHhlwRXcZaIRARERSkAJBghy/7VBrCEREJHUpECTIsYsKte1QRERSmQJBgvS9y0BrCEREJPUoECRIn1MGIU0ZiIhI6lEgSJDIcSME0aOLNWUgIiKpR4EgQcI6ulhERE4hCgQJEo70NWWgNQQiIpJ6FAgSJHLM9cdOuw0b2nYoIiKpSYEgQY7dZWCz2XA57ZoyEBGRlKRAkCDH7jKArmkDLSoUEZFUpECQIEcCwZEuTnPY6NQaAhERSUEKBAkS3WXgOGqEwOW006E1BCIikoIUCBIkbJjYgKPyAC6H1hCIiEhqcsarYBgGy5Yto6WlBZfLRVVVFePGjYuV19XVsWHDBpxOJ2VlZUyfPp22tjYqKiro7Oxk1KhRrFixArfb3WvdqMcff5x9+/ZRUVEBwLp163jqqacYPnw4APfccw+jR49m6dKl7N+/H4/Hw6pVq2LlqSZimDjsNmy2nmsIOkMGhmFiPzopiIiIJFncEYJNmzYRDAapra1lyZIlrFy5MlbW2tpKTU0NGzZs4LHHHuPBBx8kGAyyZs0aZs+ezfr165k0aRK1tbV91u3s7KSiooL169f3+N4dO3awatUqampqqKmpYcKECTz55JPk5+ezfv165syZw5o1a6zvEYtEA8HRoocTdYY1SiAiIqklbiBoaGhg2rRpAEyePJmmpqZY2fbt2ykqKsLlcuH1esnLy6O5ubnHe0pKSti6dWufdQOBAHPmzGHRokU9vnfHjh2sXbuW+fPn88gjjxzXlpKSEl566SVreiEBwhGzxw4D6JoyAB1fLCIiqSduIPD5fGRlZcVeOxwOwuFwrMzr9cbKPB4PPp+vx3OPx0N7e3ufdXNycrjyyiuP+95Zs2axbNkynnjiCRoaGnjhhRd6/dx4qqurKSgooKCggOrq6rj1rRI2DJyOnt2bpuOLRUQkRcVdQ5CVlYXf74+9NgwDp9PZa5nf78fr9caeZ2Rk4Pf7yc7O7rNub0zT5MYbb4yVX3XVVbz++us9PiP6ufGUl5dTXl4et57VIoZ5XCBI7w4E7Z3aaSAiIqkl7ghBcXEx9fX1ADQ2NpKfnx8rKywspKGhgUAgQHt7Ozt37iQ/P5/i4mI2b94MQH19PVOmTOmzbm98Ph+zZ8/G7/djmiavvPIKl1xySa+fm6rCvawhyHQ5ADh4OJSMJomIiPQp7gjBzJkz2bJlC/PmzcM0TZYvX866devIy8tjxowZLFiwgNLSUkzTZPHixaSnp1NWVkZlZSV1dXXk5uayevVqMjMze63bG6/Xy+LFi7nhhhtwuVx88pOf5KqrruKyyy6jsrKS+fPnk5aWxurVqy3vEKtEjOPXEEQDwYEOBQIREUktNtM0zWQ34nTyi1ffJRQxufu/mzg3J4OyT18QK9v+3gE2/M+7rJz7ceZdlpfEVoqIiPSkg4kSwDTNE04ZHNCUgYiIpBgFggSImMffYwDg1pSBiIikKAWCBIgcc/Vx1JFFhcEhb5OIiMiJKBAkQCTSVyDoWsOpEQIREUk1CgQJEI5efezoGQjcaZoyEBGR1KRAkACxQHDMCIHDbsOb7uSjDk0ZiIhIalEgSIC+1hAADPOk6WAiERFJOQoECRA2DAAc9uO7d5jbpSkDERFJOQoECRDpY8oAYFhmGodDETp1wZGIiKQQBYIEONGUQY47DYBDmjYQEZEUokCQAH0tKoSuEQLQaYUiIpJaFAgSIBw5QSBwuwD4yK+dBiIikjoUCBIgNmXg6GVRoUYIREQkBSkQJMCRXQa9TRl0jRAc1E4DERFJIQoECXDCXQbu6AiBpgxERCR1KBAkQL8WFWqEQEREUogCQQKc8KRCrSEQEZEUpECQACcaIcjp3mVwQPcZiIhIClEgSIBI5ARHF2vKQEREUpACQQKEzb6nDNIcdrLSnQoEIiKSUhQIEiBygoOJoOv4Yt14KCIiqUSBIAFiawgcvQeCYZlpWkMgIiIpRYEgAU60ywC6AoE/GCEYNoayWSIiIn1SIEiAcLxAEN1poMOJREQkRSgQJECk++hiZy+7DAByunca6PhiERFJFQoECXCicwgAcnU4kYiIpBhnvAqGYbBs2TJaWlpwuVxUVVUxbty4WHldXR0bNmzA6XRSVlbG9OnTaWtro6Kigs7OTkaNGsWKFStwu9291o16/PHH2bdvHxUVFQA888wzPPHEEzgcDvLz81m2bBl2u505c+bg9XoBGDt2LCtWrLC6TwYt7hqC2OFECgQiIpIa4gaCTZs2EQwGqa2tpbGxkZUrV/Lwww8D0NraSk1NDRs3biQQCFBaWsrUqVNZs2YNs2fPZu7cuaxdu5ba2lpmzZrVa13DMPje977H9u3bueaaawDo7OzkP//zP/nNb36D2+3mtttu44UXXuDKK68EoKamJoFdMnjheNsOY4cTaQ2BiIikhrhTBg0NDUybNg2AyZMn09TUFCvbvn07RUVFuFwuvF4veXl5NDc393hPSUkJW7du7bNuIBBgzpw5LFq0KPa5LpeLDRs24Ha7AQiHw6Snp9Pc3Mzhw4e56aabuOGGG2hsbLS0M6wSGyHoa9uhW6cViohIaokbCHw+H1lZWbHXDoeDcDgcK4sO3wN4PB58Pl+P5x6Ph/b29j7r5uTkxP7kH2uU3c5ZZ50FdI0GdHR0MHXqVDIyMvja177GY489xj333ENFRUWsLX2prq6moKCAgoICqqur4/24lojtMrD1te1QuwxERCS1xJ0yyMrKwu/3x14bhoHT6ey1zO/34/V6Y88zMjLw+/1kZ2f3WbcvhmHwwAMP8Pbbb1NdXY3NZmP8+PGMGzcu9vfDhg2jtbWVc889t8/PKS8vp7y8PN6PaamIYeCw27D1GQg0QiAiIqkl7ghBcXEx9fX1ADQ2NpKfnx8rKywspKGhgUAgQHt7Ozt37iQ/P5/i4mI2b94MQH19PVOmTOmzbl/uvvtuAoEAa9asiU0dPPXUU6xcuRKAvXv34vP5GDly5Mn/9AkSNsw+1w+ArkAWEZHUE3eEYObMmWzZsoV58+ZhmibLly9n3bp15OXlMWPGDBYsWEBpaSmmabJ48WLS09MpKyujsrKSuro6cnNzWb16NZmZmb3W7c2OHTt46qmnuPTSS7nxxhsBuOGGG/jCF77Ad77zHebPn4/NZmP58uWx0YpUEjHMPncYQNddBqBzCEREJHXYTLP7aj6xxC9efZcVzzYTjhjc8dmLepSlOWx88dLzAJh09++YMNLDM+XTktFMERGRHnQwUQLEGyGArp0GH/k1QiAiIqlBgSABwoaJo49ji6NyMl26AllERFKGAkECRAzjhIsKoWuEwBcIE4roxkMREUk+BYIECEdMnH0cShQV3WmgUQIREUkFCgQJEDHMPg8lioodTqSdBiIikgIUCCxmmCYmfV9sFHVkhECnFYqISPIpEFgs3k2HUdH7DLTTQEREUoECgcX6Gwhyu6cM2nTjoYiIpAAFAotFA4E9zhqC4Z7uQOBXIBARkeRTILBYxOzfCMGILAUCERFJHQoEFjP6OWUwwtN1j8M+XyDhbRIREYlHgcBi/Z0y0AiBiIikEgUCi/V3UWGmy0G6085+nwKBiIgknwKBxY6sIThxPZvNxllZ6RohEBGRlKBAYLHYCEGcKQPo2mmwzxdAN1CLiEiyKRBYzOi+q8geZ8oAutYRBMIGHcFIglslIiJyYgoEFgv3cw0BHDmLQOsIREQk2RQILGaY/Z8yGBENBH5tPRQRkeRSILBYf3cZAIzI6jqLQAsLRUQk2ZzJbsDp5kTnEDgdNp7etodwpGuhwa5WHwC/a/oHbf4gToed64rGDF1jRUREuikQWCze0cXhiEEo0lUnI80BwKHOcPczY0jaKCIicixNGVhsIFMGHldXHvMHwgltk4iISDwKBBYbyDkEWekKBCIikhoUCCwW3WXQn3MIPN2BwKdAICIiSaZAYLEjUwbx67qcdtIcNvxBBQIREUkuBQKLHQkE/etaT7oTf0AnFYqISHLF/a1lGAZ33303X/7yl1mwYAG7d+/uUV5XV8fcuXP50pe+xAsvvABAW1sbN910E6WlpXz729/m8OHDfdaNevzxx/nBD34Qe/38889z/fXX8+Uvf5m6ujoAOjs7KS8vp7S0lJtvvpm2trbB/fQJcGQNQf/qZ6U78QfCus9ARESSKm4g2LRpE8FgkNraWpYsWcLKlStjZa2trdTU1LBhwwYee+wxHnzwQYLBIGvWrGH27NmsX7+eSZMmUVtb22fdzs5OKioqWL9+fexzQ6EQK1as4Gc/+xk1NTWx9z/55JPk5+ezfv165syZw5o1axLTK4MQGcAaAujaaRA2TAJhbTkUEZHkiRsIGhoamDZtGgCTJ0+mqakpVrZ9+3aKiopwuVx4vV7y8vJobm7u8Z6SkhK2bt3aZ91AIMCcOXNYtGhR7HN37txJXl4eOTk5uFwupkyZwquvvnrc57700kuWdoYVjAHsMoAjCwu100BERJIpbiDw+XxkZWXFXjscDsLhcKzM6/XGyjweDz6fr8dzj8dDe3t7n3VzcnK48sorj/vO/n5uqolebtTvEYL0rsOJ/LrxUEREkihuIMjKysLv98deG4aB0+nstczv9+P1ens89/v9ZGdn91m3P995os+Np7q6moKCAgoKCqiuro5bf7CiIwTOAUwZgEYIREQkueIGguLiYurr6wFobGwkPz8/VlZYWEhDQwOBQID29nZ27txJfn4+xcXFbN68GYD6+nqmTJnSZ93eTJw4kd27d3PgwAGCwSCvvvoqRUVFvX5uPOXl5bS0tNDS0kJ5eXn8Hhmk2BqCfk4Z6HAiERFJBXHvMpg5cyZbtmxh3rx5mKbJ8uXLWbduHXl5ecyYMYMFCxZQWlqKaZosXryY9PR0ysrKqKyspK6ujtzcXFavXk1mZmavdXuTlpbGHXfcwde+9jVM0+T666/n7LPPZv78+VRWVjJ//nzS0tJYvXq15R0yWN33FvXr6GI4MmWgw4lERCSZbKb2u1lq/k9f5qWd+7l1xoWck53Ro8ztshOOmLHLjQDe+6iDNX/aydSJI5hTNIYvXnreUDdZREREBxNZLXoOgXOguwy0qFBERJJIgcBikYHuMtCiQhERSQEKBBYzBnD9MXTdZ+By2LWGQEREkkqBwGJHdhn0/z2edIdGCEREJKkUCCwWW0PQz8uN4MgFR1rfKSIiyaJAYLEjawj6/56sdCcR06RT9xmIiEiSKBBYLDLAuwxACwtFRCT5FAgsZgzwtkM46nCiTgUCERFJDgUCi0UMExv9P7oYjpxF4AsqEIiISHIoEFgsYpj93nIYFTucSCMEIiKSJAoEFouYAw8E0QuOfAGdVigiIsmhQGCxiGEOaLoAjgSCQ52hRDRJREQkLgUCixknMWWQm+kC4CN/MBFNEhERiUuBwGLhkwgEbpeDjDQ7bR0KBCIikhwKBBYzzP7fY3C03EwXbf6gTisUEZGkUCCw2MmsIYCuQBCKmOzXtIGIiCSBAoHFurYdDvx9uZlpALzb1mFxi0REROJTILBYxDAHdGxxVK6na2Hhex8dtrpJIiIicSkQWOxkziGAIzsN3v1IIwQiIjL0FAgsZhjmgO4xiIqOELzbphECEREZegoEFooYJiYDu+kwKrqG4D2NEIiISBIoEFgoFDGAk9t2mO504HE5tIZARESSQoHAQoMJBADDPS72fHQYw9BZBCIiMrQUCCwUjnT9Ij+ZcwigKxAEIwYftgesbJaIiEhcCgQWChmDGyHQTgMREUkWBQILRUcIBjNlADqcSEREhp4CgYWiawgGM2UAOpxIRESGnjNeBcMwWLZsGS0tLbhcLqqqqhg3blysvK6ujg0bNuB0OikrK2P69Om0tbVRUVFBZ2cno0aNYsWKFbjd7n7Xfeedd1i+fHnsOxobG/nxj39MYWEh1157Lfn5+QBcffXV3HjjjQnolpMTGuQIQa5HxxeLiEhyxA0EmzZtIhgMUltbS2NjIytXruThhx8GoLW1lZqaGjZu3EggEKC0tJSpU6eyZs0aZs+ezdy5c1m7di21tbXMmjWr33W/+tWvUlNTA8Czzz7LqFGjKCkpYevWrcyePZu77rorsb1yksKxNQQn936tIRARkWSJ+6uroaGBadOmATB58mSamppiZdu3b6eoqAiXy4XX6yUvL4/m5uYe74n+Ih9I3aiOjg6qq6v57ne/C0BTUxM7duzgK1/5CrfeeisffvihdT1hgdgagpOcMkhz2Dk7O11TBiIiMuTiBgKfz0dWVlbstcPhIBwOx8q8Xm+szOPx4PP5ejz3eDy0t7cPqG7UU089xWc+8xmGDx8OwIQJE7j11lv5+c9/ztVXX01VVVXcH7C6upqCggIKCgqorq6OW38wYmsITnLKAGBsbiYfHOwk3P1ZIiIiQyFuIMjKysLv98deG4aB0+nstczv9+P1ens89/v9ZGdnD6hu1G9+8xu++MUvxl5fccUVXH755QDMnDmT119/Pe4PWF5eTktLCy0tLZSXl8etPxiDXUMAcF6um4hh8sHBTquaJSIiElfcQFBcXEx9fT3QtbgvuqAPoLCwkIaGBgKBAO3t7ezcuZP8/HyKi4vZvHkzAPX19UyZMmVAdQHa29sJBoOce+65se/73ve+x3PPPQfASy+9xMUXX2xRN1gj+qf6k50yADhveCagdQQiIjK04i4qnDlzJlu2bGHevHmYpsny5ctZt24deXl5zJgxgwULFlBaWoppmixevJj09HTKysqorKykrq6O3NxcVq9eTWZmZr/rArz99tuMGTOmR1uWLFnCnXfeyZNPPonb7e7XlMFQChmDHyEYm+sG4L22wzDRkmaJiIjEZTNNUwfnW+SPb+zla0+8ymcuPoeS/JHHlbtddsIRMza1cKw0h40xw9yUPvoK5f98AUuuKUh0k0VERAAdTGSpwV5uBEemDLTTQEREhpICgYWsWFR4Tk4GTruN3fv98SuLiIhYRIHAQrGDiQaxqDDNYSdveCY7W/1oNkdERIaKAoGFoiMEgzmHAGDCSA8HD4do8wetaJaIiEhcCgQWOrKGYHCfM3Fk10FQu/Zp2kBERIaGAoGFjlx/PLhunTDSA8CuVt+g2yQiItIfCgQWio0QDG7GgAndIwQ7WzVCICIiQ0OBwEJhw5o1BLEpA40QiIjIEFEgsJAVRxcDDPe4GJaZxi6NEIiIyBBRILBQ0IJzCKImnOVhd1sHwbBuPRQRkcRTILBQ2IKTCqMmjswiYpj8vU1/2ixdAAAbpUlEQVSXHImISOIpEFgotoZgkFMGcGRhodYRiIjIUFAgsJAVdxlERbceaqeBiIgMBQUCC0UDwWB3GYB2GoiIyNBSILBQ9GAipwVTBnnDM3HYbTqtUEREhoQCgYWsussAwOXsuuRIIwQiIjIUnMluwOkkdtvhSQYCp8PG09v2xHYruNPsvN0R4vEtb+NJd+J02LmuaIxl7RUREYnSCIGFolMGgxkgCEcMQhGTUMRkhCcdgA8OdhKKmLGgICIiYjUFAgsFu39hOwd5uVHUWd6uQNDaHrDk80RERPqiQGChcGyXgTWfd1ZWdyDwKRCIiEhiKRBYKHow0WDvMoga2T1CsE8jBCIikmAKBBay8hwCAI/LgTvNoRECERFJOAUCC4UiJjasOboYwGazMTbXzT5fkA8PdVrymSIiIr1RILBQOGJYcmzx0S49fzgAf3mnzdLPFREROZoCgYVCEdPyQDDp3Gy86U7++vePdBWyiIgkjAKBhcKG9SMEDruNKefn0hkyeO29A5Z+toiISFTckwoNw2DZsmW0tLTgcrmoqqpi3LhxsfK6ujo2bNiA0+mkrKyM6dOn09bWRkVFBZ2dnYwaNYoVK1bgdrsHVLeqqoq//vWveDxdt/6tWbOGUCjUa91UEYqYlq0fONonzh/O5pZWXt6laQMREUmMuCMEmzZtIhgMUltby5IlS1i5cmWsrLW1lZqaGjZs2MBjjz3Ggw8+SDAYZM2aNcyePZv169czadIkamtrB1QXYMeOHTz66KPU1NRQU1OD1+vts26qCEUMnBaPEADkZrrIP9vL39s62PH+Qcs/X0REJG4gaGhoYNq0aQBMnjyZpqamWNn27dspKirC5XLh9XrJy8ujubm5x3tKSkrYunXrgOoahsHu3bu5++67mTdvHk899dRxbYnWTSXhiGnZlsNjXT6+a3Hh+lf+npDPFxGRM1vcQODz+cjKyoq9djgchMPhWJnX642VeTwefD5fj+cej4f29vYB1e3o6OArX/kKDzzwAI8++ijr16+nubm517rxVFdXU1BQQEFBAdXV1f3pk5OWiDUEUfnneBnmTuNX2/bgD4QT8h0iInLmihsIsrKy8Pv9sdeGYeB0Onst8/v9eL3eHs/9fj/Z2dkDqut2u7nhhhtwu91kZWVxxRVX0Nzc3GvdeMrLy2lpaaGlpYXy8vL+9MlJC0VMy04pPJbdZuPS83PxByP8sfnDhHyHiIicueIGguLiYurr6wFobGwkPz8/VlZYWEhDQwOBQID29nZ27txJfn4+xcXFbN68GYD6+nqmTJkyoLrvvPMOpaWlRCIRQqEQf/3rX7n44ot7rZtKQhEDRwL3bfzT2GEA/Hb7+4n7EhEROSPZTNM0T1QhusvgzTffxDRNli9fTn19PXl5ecyYMYO6ujpqa2sxTZOFCxdy7bXXsm/fPiorK/H7/eTm5rJ69WoyMzMHVPenP/0pv/vd70hLS+Pzn/888+fP77Nuqsj/7rOck5PBoqsm9lrudtkJd19tfDLlaQ4bP/3zLt7Z38Ff75pJVnrcTSIiIiL9EjcQSP+N/85vOX94JjeXJC4QvH+gk//Y9CY/nDeZz08eY1nbRUTkzKaDiSwSMUxM07qLjfoyq/AcAJ7Z/kFCv0dERM4sCgQWid50mKhdBlEXjPLysXO8bG5ppb0zlNDvEhGRM4cCgUWGKhAAzPr4uQQjBpve2Jvw7xIRkTODAoFFwt3z/onadni0fyk8F4DfatpAREQsokBgkZAxdCMEE0dm8bFzvNS/uY+DhzVtICIig6dAYJHoCEGiFxVGzS7smjb4o6YNRETEAgoEFomtIRiCKQOAz1zStdvgD68rEIiIyOApEFgkenbAUEwZQNe0wfizPGx+s5XOUGRIvlNERE5fCgQWCQ/hGgIAm83GNZPOpiMYYevOfUPynSIicvrS2bcWGYpdBk6Hjae37SHcPT3h7A4fP9m8i/2+IE6HneuKdHqhiIgMnEYILBLs/iWd6EWF4YhBqPt443OHuclKd7Lj/UMEwkYsKIiIiAyUAoFFoiMEziGaMoCuK5E/do4XfyDMu20dQ/a9IiJy+lEgsEh4iEYIjjVpdDYAr39waEi/V0RETi8KBBYJGUN3UuHRJo7MwuWw8/r7h9DFlSIicrIUCCwSjt1lMLTfm+awk392Fvv9QT5sDwztl4uIyGlDgcAiRy43GvouvejcrmmDLW/tH/LvFhGR04MCgUWOHEw09N998egcRnrTeWnXfn61bc/QN0BERE55CgQWiR1MNMRrCABcTjtfuXwcGU47d/xyO017Dg55G0RE5NSmQGCR0BBfbnSskd505l+WR2fIYGFNA23+YFLaISIipyYFAoscWUOQnEAAXVsQv331hew5cJh7frMjae0QEZFTjwKBRcJDfLlRX2795wu5cFQWz/7tHxzo0CiBiIj0jwKBRYb6+uO+2O02rp8ylmDE4JntHyS1LSIicupQILBI2EiNEQKA64rGYLfBxr++l+ymiIjIKUKBwCKhcPfRxUkeIQA4OzuDqRecxba/H2BXqy/ZzRERkVOAAoFFokcXD+XlRifyhSljAfjlX3UugYiIxOdMdgNOF8m63OhoToeNp7ftIRwxCIYN0p12fv7Kbs4b7sZus+F02LmuaEzS2iciIqlLIwQWSZU1BOGIQShiYrPZuGRMDgc6Qry510coYsZCi4iIyLHijhAYhsGyZctoaWnB5XJRVVXFuHHjYuV1dXVs2LABp9NJWVkZ06dPp62tjYqKCjo7Oxk1ahQrVqzA7XYPqO7jjz/Ob3/7WwCuuuoqvvnNb2KaJiUlJZx//vkATJ48mSVLliSmZwYoVXYZHK04L5eG3R+x7e8HmDgyK9nNERGRFBY3EGzatIlgMEhtbS2NjY2sXLmShx9+GIDW1lZqamrYuHEjgUCA0tJSpk6dypo1a5g9ezZz585l7dq11NbWMmvWrH7XnTFjBr/+9a/5xS9+gc1mo7S0lKuvvhq3283FF1/MT37yk4R3zEClwsFExxo3IpPhHhevvXeAqReMIG94ZrKbJCIiKSrulEFDQwPTpk0Duv5E3tTUFCvbvn07RUVFuFwuvF4veXl5NDc393hPSUkJW7duHVDdc845h0cffRSHw4HdbiccDpOens6OHTvYu3cvCxYs4Oabb2bXrl2J6JOTkioHEx3NbrMxu/BcIobJL159T1MGIiLSp7iBwOfzkZV1ZLjZ4XAQDodjZV6vN1bm8Xjw+Xw9nns8Htrb2wdUNy0tjeHDh2OaJqtWrWLSpEmMHz+ekSNH8vWvf52amhoWLlzI0qVL4/6A1dXVFBQUUFBQQHV1dT+7ZeBCKRgIAD52TjafOD+Xfxzq5A+v7012c0REJEXFnTLIysrC7/fHXhuGgdPp7LXM7/fj9XpjzzMyMvD7/WRnZw+oLkAgEODOO+/E4/Hw/e9/H4BLLrkEh8MBwKWXXsrevXsxza4FdH0pLy+nvLx8IH1yUqK3HaZYHgDgXy45l7c+9PFCSysNu9uYMm54spskIiIpJu4IQXFxMfX19QA0NjaSn58fKyssLKShoYFAIEB7ezs7d+4kPz+f4uJiNm/eDEB9fT1TpkwZUF3TNPnGN75BQUEB9957bywEPPTQQzzxxBMANDc3M3r06BOGgaGUimsIotLTHHxhynkA3Fb3Gv5AOMktEhGRVGMzTdM8UYXoLoM333wT0zRZvnw59fX15OXlMWPGDOrq6qitrcU0TRYuXMi1117Lvn37qKysxO/3k5uby+rVq8nMzOx33S1btnDbbbcxefLkWDtuu+02JkyYwNKlS+no6MDhcHD33XczceLEhHdSf9z8/73KH17fyz3/Oom07gBzLLfLTjhixqYXhrr8uR3/YPObrXzlijyq5ny8Hz+ViIicKeIGAumf/7PuL7zQ0krVnIux23ofeEl2ILBh8vjW3bTsbeeJmy7jqvyR/fjJRETkTKCDiSwSO5goRaYweuN02Hnwy/9EmsPG7U+9puuRRUQkRoHAIqEUOLq4Py4encO3r85n76EAd/33jmQ3R0REUoQCgUVCERO7LTVuO4xnYckEivOG8ZvX3ufXr72f7OaIiEgKUCCwSDhikOY4NbrT6bDz4Jcm405zcNevmth7qDPZTRIRkSQ7NX6DnQJCEfOUCQQA55/l4buzLuLg4RC3P7UdrS0VETmz6fpji4QNA6cjtacLjr4eGcDlsJF/dhab32xlyS9eY9qFI3U9sojIGUqBwCKhiInTnvojBNHrkaOuKxrLj/74v/zmtfd5r62DZ157n7aOIHOLxrDgk+cnr6EiIjKkFAgsEooYuFJ8hKA3Oe40Pj95NBv+513+8s5HseevvXuAiaOy+NTEs5LYOhERGSoKBBYJR0xcztQfIehN4dhhnJOdQZrTxleuOJ+3Pmzny4+8zK1PNvL/vnUlo7wZyW6iiIgk2Kn5GywFnQprCE5kVHYGo7wZ5LjTmDJuOHd89mPs8wW49cltRAwtOBQROd1phMAiwbBB2imwhuBEjl50mJ3h5OLR2by8q43rH97KxaOzOW94Jv/3yvE4T6HdFCIi0j8KBBYJG+YpPUIQdfSiw7lFY/nHwbdofPcAje8eAOCRzTv5eslEbvzUODJd+tdHROR0of+jWyR8ip1D0B9ul4Pyf76Qdz/q4B8HO9l7qJM397az6nfNPPbiLr7x6Qu48VPnp+SVzyIiMjAKBBYJGQZpp8EIwbFcTjsTR2YxcWQWaQ4b11x8Do/9eRc/2/IO9z7zOn/bc5AHvlCoaQQRkVOc/i9ugYhhYpqcEucQDFaOO43brimg/vbpFOcN4+lte1hc91rssCMRETk1aYTAAtGbDk+HNQQncuxJh9cVjeEjf5DfvPY+u/f5mX95HvMvy0tyK0VE5GQoEFggFDsK+PQfITh60aHDbueGT53PE1t3s33PQZqe/hsb/uddPjlhBHOLx5B/tjfJrRURkf5SILBAuPsX5Ok+QtCbdKeDr37qfP78v6281epjx56DvPbuAX7651185fI8Fs/MZ1imK9nNFBGROBQILBAyolMGp/8IQW9cTjszLjqbz1xyDv/y8XOpf7OVB55r4YmXdvPfr73Pkpn5zL8s74ztHxGRU4H+D22B6BB62hm+/c7psPH71/fiC4S5uWQ8swvPpTMY4a7/3sGVq17g/t81J7uJIiLSB40QWCAcObNHCI52ZI2BjU9NPIuPj8nhD6/vpWH3R6z5007e+OAQswpH8+mCkZyVlZ7s5oqISDcFAgvERggUCI7jzUhjbvFYLh8/gv/3t/d5oaWVF1pasdngn8YO41MTR3D5hBFcOi4XT7r+dRQRSRb9H9gC4e41BKfjwURWGZPrpnzGBXzYHuRv7x3kjQ8Osf29riOR1/xpJ3YbfHLiWfy4tEiLEEVEkkCBwALPvPYBAGdn65rgE7HZbIzwuJh6wVlMveAsAqEIu9s6eHufn6Y9B9ny1j6m/+BPVFxbwHVFY3RXgojIELKZpqm7bQfhb+8dZM6aLZyTncFzi0t49m8fxKYQjuV22QlHTJX3Uh42DF7Z1cafWj7EH4wAkJXuZKQ3ndHDMhg3wsP4ER7OycnAnebA7XLgSXcyelgGI7PSsdk0OiMiMhj6I9ggBMMGFb94jYhhcv8XCsnSHPhJc9rtXD1pFJ8YP5w/vr6Xve2dtHeGaW0P8PY+P1ve2t/ne11OO6NzMkh3OrDZunY7nJebySVjcpg0Opu84ZlkZ6SR7XaS7nQM4U8lInLqiPsbzDAMli1bRktLCy6Xi6qqKsaNGxcrr6urY8OGDTidTsrKypg+fTptbW1UVFTQ2dnJqFGjWLFiBW63O2F1k+Wh5/+Xlr3tlF6ex9QLzkpaO04nHpeDz3783B7PgmEDfzDEPl+Qj/xBQhGTUMSgMxThQEeIto4g+/1BIoaJYZiEDZOmPYd4tukfx32+N93JecMzGTcik9HD3GSk2XE5HLhddsYM63qeNyITgEOHQxw6HCbT5WD0MDcupxaNisjpK+6Uwe9//3uef/55Vq5cSWNjI4888ggPP/wwAK2trdx0001s3LiRQCBAaWkpGzdu5P7772fSpEnMnTuXtWvX4nK5mDVrVkLqfvWrXx2KfjrOWx/6uPY/6zknO4PffXsa3ow0AH7x6rspOSR/qpcP5L2maXLwcIj3D3Ty/sHDtHeGCEYMDgcNDh0Osd8f6PNz+mK3wbk5bs4b7iZveCZ5wzM5Kyudw6EIvs4wh0MRXE47mS4HbpcTd5qj++8dXVMc3dMcAB3BCB2BMBHTZJQ3g3OyM8h2Owc17WGapqZNRE5BwbBB2DBSYs1U3BY0NDQwbdo0ACZPnkxTU1OsbPv27RQVFeFyuXC5XOTl5dHc3ExDQwMLFy4EoKSkhAcffJDzzjsvIXWTFQg6QxHGDHOz6vrCWBiA6FkEvd/813Uboqnykygf2HttjPSmM9Kbzj+dlwNARpqDsGESjhiYpkl7IMzBjtCRZ0CbL8je9gD7/QHsNlvsF3lnOEKbP4g/EOHlXW28vKut1zYMhstpJ8Npx+V0kH7MSERvmd1msxExTDqCXWEkFDEZ7nExMiudEVkuwt1lHYEIJuC023A67LgcXX912m2kOew4HTacdhsOuw3TBMPs+j4TMEyz+1nX9xtHtcOGrbsdPdvUVdbzeY9203UHhsPe9c/Mbu/6frvN1ut7jn3Ue53jH/Zar9c22eLWOdVjVrJzYm//fIb0+5P49YZp0t4Z5kBHiEOdITKcDnLcaeS40/ioI8hbrT527+8gYpiMzsnggrO9XDoul1umX4AjCQfdxQ0EPp+PrKys2GuHw0E4HMbpdOLz+fB6j1xg4/F48Pl8PZ57PB7a29sTVjee6upqHnroIQC++c1vUl5eHvc9/XHJmBzqb59+3PP3XvylZd9xpqmurlbfDYL67+Sp706e+m5wUqn/4k6KZmVl4ff7Y68Nw8DpdPZa5vf78Xq9PZ77/X6ys7MTVjee8vJyWlpaaGlpGZJOj4YPGTj13eCo/06e+u7kqe8GJ5X6L24gKC4upr6+HoDGxkby8/NjZYWFhTQ0NBAIBGhvb2fnzp3k5+dTXFzM5s2bAaivr2fKlCkJqysiIiKDF3fKYObMmWzZsoV58+ZhmibLly9n3bp15OXlMWPGDBYsWEBpaSmmabJ48WLS09MpKyujsrKSuro6cnNzWb16NZmZmQmpKyIiIoOng4kslkrzQaca9d3gqP9Onvru5KnvBieV+k+BQEREROKvIRAREZHTnwKBiIiIKBCIiIiIAoGIiIigQCAiIiLo+mPLxLsV8kwTCoW488472bNnD8FgkLKyMi644ALuuOMObDYbF154Id///vex2+089NBD/OlPf8LpdHLnnXdSWFjI7t27+133dLV//37mzp3Lz372M5xOp/puAB555BGef/55QqEQ8+fP57LLLlP/9UMoFOKOO+5gz5492O127rvvPv271w+vvfYaP/jBD6ipqRlQH1hR11KmWOK5554zKysrTdM0zW3btpmLFi1KcouS66mnnjKrqqpM0zTNtrY286qrrjIXLlxovvzyy6ZpmuZdd91l/v73vzebmprMBQsWmIZhmHv27DHnzp1rmqY5oLqno2AwaH7jG98wr7nmGvOtt95S3w3Ayy+/bC5cuNCMRCKmz+czf/SjH6n/+ukPf/iDeeutt5qmaZovvvii+c1vflN9F8fatWvN2bNnm1/84hdN0xxYHwy2rtU0ZWCRE90KeSb6zGc+w7e+9a3Ya4fDwY4dO7jsssuArtsqt27dSkNDA1deeSU2m43Ro0cTiURoa2sbUN3T0apVq5g3bx6jRo0CUN8NwIsvvkh+fj633HILixYt4tOf/rT6r5/Gjx9PJBLBMAx8Ph9Op1N9F0deXh7V1dWx14nqr97qWk2BwCJ93Qp5pvJ4PGRlZeHz+bj11lv59re/jWmasStyj77Z8uh+iz4fSN3TzS9/+UuGDx8eC5iA+m4APvroI5qamvjhD3/IPffcQ0VFhfqvnzIzM9mzZw+f/exnueuuu1iwYIH6Lo5rr702duEfJO6/1d7qWk1rCCxyolshz1QffPABt9xyC6WlpXzuc5/jgQceiJXFu9ny6LmxeHVPNxs3bsRms/HSSy/xxhtvUFlZ2eNPVOq7Exs2bBgTJkzA5XIxYcIE0tPT+cc//hErV//17fHHH+fKK69kyZIlfPDBB9x4442EQqFYufouvoH0wWDrWt52yz/xDHWiWyHPRPv27eOmm25i6dKlfOELXwBg0qRJvPLKK0DXbZWXXnopxcXFvPjiixiGwfvvv49hGAwfPnxAdU83//Vf/8XPf/5zampquOiii1i1ahUlJSXqu36aMmUKf/7znzFNk71793L48GE++clPqv/6ITs7O/bLOicnh3A4rP9uByhR/dVbXavpLgOLRHcZvPnmm7FbISdOnJjsZiVNVVUVzz77LBMmTIg9++53v0tVVRWhUIgJEyZQVVWFw+Ggurqa+vp6DMPgO9/5Dpdeeilvv/02d911V7/qns4WLFjAsmXLsNvt/e4P9R3cf//9vPLKK7HbUseOHav+6we/38+dd95Ja2sroVCIG264gUsuuUR9F8d7773HbbfdRl1d3YD6wIq6VlIgEBEREU0ZiIiIiAKBiIiIoEAgIiIiKBCIiIgICgQiIiKCAoGIJNAf//hHfvjDHya7GSLSD9p2KCIiIjq6WEROziuvvMKaNWtwOp289957FBYWUlZWxje+8Q1yc3PJyMjgc5/7HH/5y19YuXIlW7duZeXKlZimyejRo1m9ejVut5v777+fv/zlL0QiEebOnctXv/rVZP9oImckBQIROWnbtm3jV7/6FePHj+db3/oWmzdv5u233+bRRx9l7Nix/PKXvwQgGAxSUVHBY489xkUXXcTq1at5+umnY/d9PP300wSDQb72ta9xySWXnHYn2YmcChQIROSkfeITn4gdT/35z3+euro6RowYwdixY3vUa2lp4eyzz+aiiy4CYMmSJQDceuutvPHGG7z88ssAdHR00NLSokAgkgQKBCJy0o4+S900TRwOBxkZGcfVS0tLi13dCtDe3o7f7ycSibB06VKuueYaANra2vB4PIlvuIgcR7sMROSkNTQ0sHfvXgzD4Fe/+hUlJSW91hs/fjz79+/nrbfeAuDRRx/lySef5IorrqCuro5QKITf76e0tJTGxsah/BFEpJtGCETkpI0aNYrbb7+dvXv3MnXqVD71qU+xdu3a4+qlp6fzwAMPcPvttxMKhcjLy+P+++/H5XKxe/durrvuOsLhMHPnzuXyyy9Pwk8iItp2KCIn5ZVXXuGhhx6ipqYm2U0REQtoykBEREQ0QiAiIiIaIRAREREUCERERAQFAhEREUGBQERERFAgEBERERQIREREBPj/AZMRw+TqNJ9UAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 576x396 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.distplot(train_y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "从标签分布可以看出不服从正态分布，采用box-cox使其正态化。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc_y_train, maxlog= stats.boxcox(train_y) #bc_y 是box-cox后的数据，lamda是变换参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x25f85624808>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfMAAAFQCAYAAACrh5cLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3Xl8lOW9///XrNkmK1lYEyCQsIlhcSsCIoI91aqVoyA16re2Uk9Ne5Ra/dlTay0inlPst43Vo0fN6YlVoC7H1vZbLRUJsqhEwhIgQIAAAbISyEzIMjP374+QyBIYliR3ZvJ+Ph48ZOa+78nnEpI313Vf93VZDMMwEBERkaBlNbsAERERuTQKcxERkSCnMBcREQlyCnMREZEgpzAXEREJcgpzERGRIKcwFxERCXIKcxERkSCnMBcREQlyCnMREZEgpzAXEREJcgpzERGRIGcPdILf7+epp56ipKQEp9PJggULSEtLaz/+3//93/zlL38BYOrUqTz00EM0Njby6KOPUlNTQ1RUFM899xwJCQl8/PHH/O53v8NutzNr1izuvPPOrmuZiIhILxGwZ758+XKam5tZunQp8+fPZ9GiRe3H9u/fz5/+9CeWLFnC0qVL+fTTT9m+fTtvvfUWGRkZvPnmm9x22228+OKLtLS08Oyzz/L666+Tn5/P0qVLqaqq6tLGiYiI9AYBw7ywsJDJkycDkJWVxZYtW9qP9e3bl1dffRWbzYbVasXr9RIWFnbKNVOmTGHt2rWUlpaSmppKbGwsTqeTCRMmsH79+i5qloiISO8RMMzdbjcul6v9tc1mw+v1AuBwOEhISMAwDJ577jlGjRrFkCFDcLvdREdHAxAVFUV9ff0p77W973a7AxaYm5tLZmYmmZmZ5ObmXnADRUREQl3Ae+YulwuPx9P+2u/3Y7d/dVlTUxNPPPEEUVFR/PznPz/jGo/HQ0xMzBmf4/F4Tgn3s8nJySEnJ+f8WyQiItLLBOyZjx8/noKCAgCKiorIyMhoP2YYBv/yL/9CZmYmTz/9NDabrf2alStXAlBQUMCECRNIT0+nrKyMuro6mpubWb9+PePGjeuKNomIiPQqFsMwjHOd0DabfceOHRiGwcKFCykoKCA1NRW/388jjzxCVlZW+/mPPPIII0aM4LHHHqOqqgqHw8HixYtJSkpqn81uGAazZs3i29/+dpc3UEREJNQFDHMRERHp2bRojIiISJBTmIuIiAQ5hbmIiEiQU5iLiIgEuYDPmYuIXIr3NpTj9fnPeN9us/KtcQNMqEgk9CjMRaRLeX1+WnwdPTRzZsCLyMXRMLuIiEiQU5iLiIgEOYW5iIhIkFOYi4iIBDmFuYiISJBTmIuIiAQ5hbmIiEiQU5iLiIgEOYW5iIhIkFOYi4iIBDmFuYiISJBTmIuIiAQ5hbmIiEiQU5iLiIgEOYW5iIhIkFOYi4iIBDmFuYiISJCzm12AiPR8720ox+vzn/G+3WblW+MGmFCRiJxMYS4iAXl9flp8RgdHzgx4Eel+GmYXEREJcgpzERGRIKdhdpFupvvPItLZAoa53+/nqaeeoqSkBKfTyYIFC0hLSzvlnNraWubMmcOf//xnwsLCeOWVV1i1ahUAx44do7q6mtWrV5OXl8fbb79NQkICAL/4xS8YOnRoFzRLpOfS/WcR6WwBw3z58uU0NzezdOlSioqKWLRoES+99FL78VWrVrF48WKqq6vb33vggQd44IEHAJg3bx4//vGPASguLua5555jzJgxnd0OERGRXivgPfPCwkImT54MQFZWFlu2bDn1A6xW8vLyiIuLO+Pajz76iJiYmPbri4uLeeWVV7jrrrt4+eWXO6N+ERGRXi9gz9ztduNyudpf22w2vF4vdnvrpZMmTTrrtS+//DLPP/98++ubbrqJuXPn4nK5eOihh1ixYgXTpk0759fPzc3lhRdeAOChhx4iJycnUMki0gHdqxcJXQHD3OVy4fF42l/7/f72ID+XXbt2ERMT035/3TAM7r33XqKjowGYOnUqW7duDRjmOTk5CnCRTqB79SKhK+Aw+/jx4ykoKACgqKiIjIyM8/rgNWvWMGXKlPbXbrebm2++GY/Hg2EYfPbZZ7p3LiIi0gkCdrFnzJjB6tWrmTNnDoZhsHDhQvLy8khNTWX69OlnvW7Pnj2nDMFHR0fz8MMPc8899+B0OrnmmmuYOnVq57RCRESkF7MYhtHRuJuIdJE/rt/f4XC3w2bhjomDeuTXNetaETk/WgFOREQkyCnMRUREgpzCXEQu2BFPM8ebfWaXISInaG12Ebkg2w8fI39tGQaQ6HKyelc1c65M5eqhfcwuTaTXUs9cRM5braeZZev3Y7NaGJbkwt3k5X+LDnLP65/z2e4as8sT6bXUMxeR89Li8/Pm52U0tvi5fdwAJg5OwGaFPlFhfP+NQr77P+t5+/tfI7NvtNmlivQ66pmLyHn5YNNBDtY1MiEtnomDW3c+tFos3DAqhV/dcTn1jV7uff1zDtYdN7lSkd5HYS4iARUfPMoXe4/QLzacWy7vf8bx28YN4IlvjODwsUbu+q91FO2vM6FKkd5LYS4i52QYBsu3VWIB7pw4CIet4x8b35s8lB9eP4yymgZmvbSGxR+V0OzVuu8i3UH3zEXknNaU1nDgyHFG948hJSb8rOdZLBYemZnJ1el9ePSPm8j9eBd/2XyIgXERDIiPJK1PJDHhjm6sXKT3UJiLyDm9+MkuAKZmJJ3X+V9LT+TDh6fwzF+2sWz9fnZXte66aAFuyxrAFUMSuqpUkV5Lw+wiclYb99exelcNw5NdDIyPPO/rXGF2nr39MjY/NZPvTx3KjaNSiHDa+N+icjaXH+3CikV6J4W5iJzVf64sBWBa5vn1yk8X6bSTnuRiamYy/+drQ3DarSz7Yj87K+s7s0yRXk9hLiIdKq1y87fiw1w+MJZhya5L/rwB8RFkX52GxQJ/WLePA0caOqFKEQGFuYicxeKPSjAM+P7UdCwWS6d85tAkF3OuGESzz8+fNx7qlM8UEYW5iHRg+dYK/rr5MBPS4rlxdN9O/exR/WPJSHGxu9pDYdmRTv1skd5KYS4ip3A3efnZ+1tw2Cw8e/tlWK2d0ys/2ZQTM+Pb7smLyKVRmIv0AoZhcKShmer6Jqrqm6h2N+HzGx2e+6sPSzh0tJEHp6aTkdI166wP6RNFWkIkf99awc4KTYYTuVR6zlwkRPn9BuvLjvBR8WE+2lrBvtpTJ5zFRzr4Wnoi16S3Pvft8xusLa3h92v3MjQpin+ZNqzLarNYLEwbkcR/rynjP1fuZvGdl3fZ1xLpDRTmIiGo4lgjDy8tYk1p67akrjA7YwbEEG63YbFAk9fPtkPH+MvmQ3y8vZI31u1jd7WnffnVhd+6jHCHrUtrHNkvhuHJLt4vKueRmRkMiIvo0q8nEsoU5iIhZkVJJfOXbaTW08y0zCTu/dpgrknvw5+KDtLi+2po3dPk5bM9NazbXcu+2gYyU6IZluxixqgUrh7ap8vrtFosfH9qOvP/uJFXV+3m598c3eVfUyRUKcxFQsR7G8pZvrWCv2w+hM1q4bas/nwtvQ9V9U18VFxxxvlRYXauH5HCzFEp/POEQV0y0S2QW7L6s/ijEpZ8vp+c64eTEOXs9hpEQoEmwImEiMpjjfyt+DAx4XYenJrOlUP64PVDi8/A6z/77mUWi8WUIAdw2Kx8b8pQjrf4+P2avabUIBIKFOYiIeJvWw7j8xt8fUw/+gfR/efZVwwiPtLB79fuxdPkNbsckaCkMBcJAVvKj7Jhfx3948IZOzDW7HIuSKTTzn1fG0JdQwtLvthvdjkiQUn3zEWCnGEYLPzrNgD+aUw/rJ209Or5sNssvLehHK+v42H8850Rf881abxcUMqrq3aTfXUaTrv6GSIXQt8xIkFu5Y4q1pTWkJkSTXrSpW+IcqG8Pj8tPqPDX+e6V3+y+Cgnd12ZyqGjjbxfVN7FFYuEHoW5SBAzDIN//1sJFgt847LOXUO9u3138hAcNgsvfVLa/ry7iJwfhblIENt04ChbDx3jn8b0DapJbx3pFxvBXVemsrvaw8tas13kggQMc7/fz5NPPsns2bPJzs6mrKzsjHNqa2uZOXMmTU1NQGtvYfLkyWRnZ5Odnc3ixYsB+Pjjj5k1axazZ89m2bJlndwUkd7n3S8PAHDHhEEmV9I5fnxjJsnRYeSu2MXuKrfZ5YgEjYBhvnz5cpqbm1m6dCnz589n0aJFpxxftWoV3/nOd6iurm5/b9++fYwePZr8/Hzy8/OZP38+LS0tPPvss7z++uvk5+ezdOlSqqqqOr9FIr1Es9fPnzYeJNHlZPLwRLPL6RQx4Q5+cctomr1+nnhvM4bR8WYwInKqgGFeWFjI5MmTAcjKymLLli2nfoDVSl5eHnFxce3vFRcXU1FRQXZ2Nt/73vfYvXs3paWlpKamEhsbi9PpZMKECaxfv76TmyPSe3xSUsmRhhZuzRqA3RY6d8y+PqYvN4xMYd3uWv5YeMDsckSCQsCfAG63G5frqxmyNpsNr/erhR0mTZpEfHz8KdckJSXxwAMPkJ+fz7x583j00Udxu91ER3+1nWJUVBRud+BhtNzcXDIzM8nMzCQ3N/e8GiXSG7z7Zeus79vHDzC5ks5lsVh4+tbRRDltPPOXbeyraQh8kUgvFzDMXS4XHo+n/bXf78duP/fj6WPGjGH69OkATJw4kYqKijM+x+PxnBLuZ5OTk0NJSQklJSXk5OQEPF+kN6hraOYf2ysY0TeaUf1izC6n0/WPi+BnN4/i6PEW7sv7nCOeZrNLEunRAob5+PHjKSgoAKCoqIiMjIyAH/rCCy/w+9//HoDt27fTv39/0tPTKSsro66ujubmZtavX8+4ceMusXyR3unPmw7R4jO4ffwALN24SEx3mnNlKvOmDmV3tYfv/s96Glt8Zpck0mMFXAFuxowZrF69mjlz5rSuNLVwIXl5eaSmprb3vk/3wAMP8Oijj7Jy5UpsNhvPPvssDoeDxx9/nPvvvx/DMJg1axYpKSmd3iCR3uDdLw9gtcCtWaE1xH66jJRosgbFUVh2hH/+zzV8+6pU7FYrdpuVb40L7baLXIiAYW61Wnn66adPeS89Pf2M8z7++OP238fGxvLKK6+ccc7111/P9ddffzF1isgJe6s9bNhXx+ThiaTEhJtdTpfy+w1uHzeAo8db2FJ+jEX/r4TJwxO5phv2WxcJJlqbXSTI/HnjQQBuC/FeeRu7zUr21Wn8Y1sFn++t5YNNh/ikpIqPtlYwsl80g/tEYTttC1f13KW3UZiLBBHDMPjTxoM47VZmju49t6nCHTZuGtufqZnJrCmtZt3uGlbuqGLljioinTamZiQxeXjSSVdoOVjpXRTmIkGkpKKenZVuvj66L9HhDrPL6XauMDszR/Xlpsv6UnLYzabyo2wpP8r/23KYFp+f60f0nn/giJxMYS4SRNqG2L95eX+TKzGX3WYls280Q5NcTM1I4tVVu1m+rRKLxcK0zGSzyxPpdqGzbJRIiDMMgz9vPESU08b1IxRYbeIjnXz32qHERTj4+9YKCnZomWjpfRTmIkFi44Gj7KttYMaoFCKcNrPL6VHio5x8d/JQYiMcfFh8mENHj5tdkki3UpiLBAkNsZ9bQpST27IGYADvFx3UJi3SqyjMRYKA32/wwaaDxEY4Tpu1LSfL7BtNZko0pVUePiyuMLsckW6jMBcJAut211BxrIl/GtMXp13ftufyjcv6YbXAM3/dqiVgpdfQTwWRIPC7T3YBcMfEQSZX0vMlRYdx7bBE9tce57VP95hdjki3UJiL9HBf7K1l9a4aJg9PZEJafOALhBtGpdAnysnvVuyisr7R7HJEupzCXKSH+83ynQD8aPpwkysJHhEOG/86I4OGZh//+clus8sR6XIKc5EerLCslk93VXPtsEQmDk4wu5ygMnviIAbERfDGZ2VUHFPvXEKbwlykB/u/bb3yG9Qrv1BOu5Wc64fR7PXz4opdZpcj0qUU5iI91IqSSlbtrGbSsD5coV75RZk1YSCpCZG89fl+yuu0kIyELq3NLtLD7Ktp4LkPt/OXTYewWODhGzLMLino2G0W3ttQjtfn55qhCSxdf4D5y4qYNX7giePaIlVCi8JcpIfwGwa//cdOcj/eSYvP4PKBsfz0plEhe6/85MDtSLjj0pas9fr8tPgMxgyIY/m2Sj7fU8vkYUnERznRFqkSahTmIj1As9fPuxsOsOnAUfrHhvP4N0Zy82X9sFotZpfWpdoCtyN2W+cErs1qYfrIZJatP8DHJZXtvXORUKIwFzHZscYW8teWUV53nCsHJ/Cf2RNIiHJ229fv6h5yTzB2YBwrSqrYsO8I12UkMTAh4qxt1hC8BCNNgBMxkWEYvP7pHsrrjjMxLZ78717ZrUHepq2H3NEvrz/4h6StFgvTRyTjN+Dj7ZXA2dt8tn/UiPRkCnMRE+2rbaCyvokxA2K5c+JAwuzB3wvuqcYMiCUlJoyi/XVU6rlzCTEKcxETbTxwFICJafFYLKF9f9xsrb3zFAzQjmoSchTmIibx+Q02lx8l0mkjPclldjm9wuj+MfSPDWfDvjoOH1XvXEKHwlzEJHuqPXiavIwZEIstxGet9xQWi4XpI1t753/fpt65hA6FuYhJNh6oA+DygXEmV9K7jOgbTWpCBJsOHOXQUa0KJ6FBYS5iAq/PT/HBo8SE20nrE2l2Ob2KxWLh62P6AvCPbZUmVyPSOfScuYgJdlS4aWzxMzEtAesFTHw727PRofAseHca0TeatIRIth46RvmR4wyIjzC7JJFLop65iAnahtjHDoy9oOvO+mx0CDwL3p0sFgszR6cAsFz3ziUEKMxFulmz18/2w8foE+VkQJx6hGYZnuxicJ9ISirq2V/bYHY5IpckYJj7/X6efPJJZs+eTXZ2NmVlZWecU1tby8yZM2lqagKgvr6e73//+9x9993Mnj2bDRs2APDRRx9xww03kJ2dTXZ2Np9//nknN0ek5yur8dDiMxjVL0bPlpvIYrFww6jW3vknO6pMrkbk0gS8Z758+XKam5tZunQpRUVFLFq0iJdeeqn9+KpVq1i8eDHV1dXt7+Xl5XH11Vdz3333sXv3bubPn897771HcXExjz76KDfeeGPXtEYkCOytae0FDk6MMrkSGZrool9sOCWHj+Fu8uIK0zQiCU4Be+aFhYVMnjwZgKysLLZs2XLqB1it5OXlERf31eM19913H3PmzAHA5/MRFhYGQHFxMe+88w5z585l0aJFeL3eTmuISLDYU+0BIDVBs9h7gnGp8fgN2HRiHoNIMAoY5m63G5frq9WpbDbbKSE8adIk4uPjT7kmJiaG8PBwqqqqePTRR3nkkUfaz/3Zz37GH/7wBxoaGliyZEnAAnNzc8nMzCQzM5Pc3NzzbphIT+T1+dlX20CSK4wo9QJ7hMsHxmK1wIZ9CnMJXgF/mrhcLjweT/trv9+P3R74h1BJSQmPPPIIP/nJT7jyyisBmDVrFjExMQBMnz6dDz/8MODn5OTkkJOTE/A8kWCw/XA9TV6/ni3vQaLDHQxPjqakop6KY40M1GNqEoQC9szHjx9PQUEBAEVFRWRkZAT80F27dvGjH/2IxYsXM3XqVKB1q8dbbrmFw4cPA7B27VpGjx59KbWLBJ31e2sBSOuj++U9ybjU1tuERfvVO5fgFLCLPWPGDFavXs2cOXMwDIOFCxeSl5dHamoq06dP7/CaxYsX09zczDPPPAO09u5feuklFixYwEMPPUR4eDjp6enceeedndsakR7ui7IjAAxWz7xHGdkvhnCHlaL9dXzjsr5mlyNywQKGudVq5emnnz7lvfT09DPO+/jjj9t/f/Js95Nde+21XHvttRdao0hIMAyD9XtrcYXZSYhyml2OnMRhs3LZgFi+2HuE0kq32eWIXDAtGiPSTQ4cOU7FsSaGJEbp+fIeaNyg1om8hZoIJ0FIYS7STdaXtd4v1xB7z5TWJ5L4SAdbyo/S7NXyuBJcFOYi3eSLva33y4dosZgeyWKxMKJvDE1ef/s/vESChcJcpJsU7j1ChMNGf63H3mNl9o0G4JMSLe8qwUVhLtINjja0UFJRT9agOGxW3S/vqYYkRuGwWVixXfucS3BRmIt0g8J9rcO2VwyOD3CmmMlhszIsycXOSrd2UpOgojAX6QZt98snDk4wuRIJZES/E0Pt2klNgojCXKQbFO49gtXy1Upj0nON6Nu65PQnGmqXIKIwF+liTV4fRQfqGNE3huhwh9nlSAAJUU7Sk6JYU1pDY4vP7HJEzovCXKSLtT23rPvlwWNaZjLHW3x8vkePqElw0B6MIp3svQ3leH1fLTqyoqR1uNbrN/jzxoNmlSUXYNqIZF79dA8rSiqZkpFkdjkiAalnLtLJvD4/LT6j/deeqtYthAfGR+L1a2WxYDBxcDxRTpueN5egoTAX6UJ+w6CstoG4SAexEbpfHizC7Da+NiyRPdUe9lZ7zC5HJCCFuUgXqq5voqHZx2DtXx50pp4YXl+1U71z6fkU5iJdqOzEwiNp2lwl6EwZ3hrmBTurTa5EJDCFuUgXKqtpHaJNS1DPPNik9olkcJ9I1pbW0OLTXAfp2RTmIl1ob00D4Q4ryTFhZpciF2Hy8CTcTV42aI9z6eEU5iJdpL6xhVpPM2kJUVgt2lwlGLU9llagpV2lh1OYi3SRshrdLw92Vw9NwG61aBKc9HgKc5Eusrftfrlmsget6HAH49Pi2VR+lCOeZrPLETkrrQAn0kX2VHuwWy0MjI84r/PtNssZq8edLNxh68zy5DxNGZ7I53tq+XRXNd+8vL/Z5Yh0SD1zkS7Q0Ozl8NFGBiVE4rCd/7fZ6avHnfxLq8eZY4qeN5cgoDAX6QJ7qxswgKGJGmIPdqP7xxIf6aBgRzWGYZhdjkiHFOYiXWBPtRuAIQrzoGezWrh2eBKHjzWys9JtdjkiHVKYi3SBtvvlgxI0kz0UXHdiqP3vWytMrkSkY5oAJ9LJGpq9HDrayODEqAu6Xy49Q0cTERuavVgtsOTzffSPi+Bb4waYWKHImRTmIp2s7X65htiDV9tExDYOm40hiVGUVnmorm80sTKRjqnbINLJSqt0vzwUje4fC0DxwWMmVyJypoBh7vf7efLJJ5k9ezbZ2dmUlZWdcU5tbS0zZ86kqakJgMbGRnJycpg7dy7f+973qK2tBeDjjz9m1qxZzJ49m2XLlnVyU0R6htIqDzarhVTdLw8pI/vFALC5/KjJlYicKWCYL1++nObmZpYuXcr8+fNZtGjRKcdXrVrFd77zHaqrv9om8K233iIjI4M333yT2267jRdffJGWlhaeffZZXn/9dfLz81m6dClVVXpuU0LL0eMtHKw7zqD4CN0vDzGxEQ4GxUewp9qj1eCkxwn406awsJDJkycDkJWVxZYtW079AKuVvLw84uLiOrxmypQprF27ltLSUlJTU4mNjcXpdDJhwgTWr1/fmW0RMd36vbUn7pe7zC5FusCo/rH4DVi+TbPapWcJGOZutxuX66sfTDabDa/X2/560qRJxMfHn3FNdHQ0AFFRUdTX15/yXtv7bree2ZTQ8tme1ltKul8emkafGGr/sPiwyZWInCpgmLtcLjweT/trv9+P3X7uSfAnX+PxeIiJiTnjczwezynhfja5ublkZmaSmZlJbm5uwPNFzFSwowq77peHrMToMFJiwijYWY2nyRv4ApFuEjDMx48fT0FBAQBFRUVkZGQE/NDx48ezcuVKAAoKCpgwYQLp6emUlZVRV1dHc3Mz69evZ9y4cQE/Kycnh5KSEkpKSsjJyQl4vohZDhxpYPvheoYlu3Dadb88VI3pH0uz188nJZrzIz1HwOfMZ8yYwerVq5kzZw6GYbBw4ULy8vJITU1l+vTpHV5z11138dhjj3HXXXfhcDhYvHgxDoeDxx9/nPvvvx/DMJg1axYpKSmd3iARs6zYXgnAyH6BR5wkeI0dGMs/tlfy7pcHuGlsP7PLEQHAYmjnAJFOcV/e53xSUsUT3xiBK8zR4TkRTiveE7ugXcgxXdt5117qZztsFt74bB+bD9Tx6WPX0z/u/La4FelKGgsU6QQNzV7WlNYwom808ZFOs8uRLjb3ykH4DVi2fr/ZpYgACnORTvHpzmqavX6mj0w2uxTpBjeP7Y8rzM7SL/bj82twU8ynMBfpBP/Y1nq/fPpIzQPpDaLC7Nya1Z9DRxtZuaPS7HJEFOYil8rvN/i4pJI+UU4uHxgX+AIJCXddmQrAm59pqF3MpzAXuURbDh6lqr6J6zKTsVktZpcj3WTMgFjGDozl4+0VHD6qndTEXApzkUu0/MQQ+w26X97r3HVlKn4Dln6h3rmYS2EucomWb63AYbNw7fBEs0uRbvbNy/sTHW4nf91ejjf7zC5HejGFucgl2HSgjq2HjjE1I4no8I6fLZfQ5Qqzc+81g6l2N7Pki31mlyO9mMJc5BL8z9oyAO6+Os3kSsQs37l2CBEOG68U7KbZ6ze7HOmlFOYiF+mIp5k/bzzI4D6RTBmeZHY5YpKEKCdzr0rl0NFG3v3ygNnlSC+lMBe5SMvW76fJ6+fuq9OwahZ7r/bAlKE4bVZeWlmK16feuXQ/hbnIRfD5DfLXlRHusHLHhEFmlyMmS4kJ546JAymraeCDTYfMLkd6oYC7ponImT4pqeTAkePMuWIQsZGa+Nab2G0W3ttQfkYPPDUhEqsFfrdiF7dc3l+jNdKt1DMXuQhtE9+yr9HEt97I6/PTcmJntbZf0eEOJqTFs7PSzV82q3cu3UthLnKBdlW6Wbmjiglp8YzuH2t2OdKDXD+idRXA3/xjpzZgkW6lYXaRDnQ0jNrmj+tbZyx/b/KQ7ixJgkCiK4xZ4wewbP0BPth0kFuzBphdkvQS6pmLdKCjYdQWn0FVfRPry2pJT4pi5qi+ZpcpPdBD04Zjt1r4rXrn0o3UMxe5AJ/uqsZvwPenpmuCk5zBbrNQuO8I41Pj+HzvEX72v5sZlxp/0nEr3xqn3rp0PvXMRc6Tp8nLF3triY1waPhUzsrr8zM1IxmrBT7aWkmT96v+7S9AAAAgAElEQVRRHj2DLl1FYS5yntaU1tDiM5iakYTTrm8dObv4KCfjU+Opdjex/dAxs8uRXkA/kUTOQ1OLj3W7a4h02rhqSILZ5UgQmDSsdRe9T3dVm1yJ9Aa6Zy5yHrYdPsbxFh/Xj0gmMsx21tnu4Q6bCdVJT5QSE05GiosdFW4OHGlgYHyk2SVJCFPPXOQ87KxwAzCqXwxw9tnuXr/uicpX2nrnq9U7ly6mMBcJwDAMdlW5iXLa6BsbbnY5EkSGJblIiQljc/lR6hqazS5HQpjCXCSAyvom6hu9pCe7sFr0OJqcP4vFwqT0RPwGrNtdY3Y5EsIU5iIB7KpsHWIfnuwyuRIJRpcPiiMqzM7ne2tp8vrMLkdClMJcJIC2MB+WHG1yJRKMHDYrVw1JoLHFz8b9R80uR0KUwlzkHLx+P3uqPSS5woiN0FancnEmnFgFrrDsiMmVSKhSmIucw77aBpp9foZpiF0uQXyUkyGJUeyu9rC/tsHsciQEBXzO3O/389RTT1FSUoLT6WTBggWkpX21h/OyZctYsmQJdrudBx98kGnTpvHMM8+wfft2AKqqqoiJiWHZsmUsWLCAL7/8kqioKABefPFFoqM1dCk9l+6XS2cZNyiOPdUe3i8q56Hrh5tdjoSYgGG+fPlympubWbp0KUVFRSxatIiXXnoJaA3q/Px83nnnHZqampg7dy6TJk3ipz/9KQAtLS3MnTuXX/7ylwAUFxfz6quvkpCgFbQkOOyqdGO1wJDEKLNLkSA3ZkAsf9p4kHe/LOcH04Zh0ZMR0okCDrMXFhYyefJkALKystiyZUv7sU2bNjFu3DicTifR0dGkpqa298gB3njjDSZNmkRmZiZ+v5+ysjKefPJJ5syZw9tvv90FzRHpPA3NXsqPHCc1IZIwrewmlyjcYWPMgBh2V3so2l9ndjkSYgKGudvtxuX6aojRZrPh9Xrbj508TB4VFYXb3Tos2dzczJIlS7j//vsBaGho4O677+Y//uM/ePXVV3nzzTdPCf6zyc3NJTMzk8zMTHJzcy+sdSKXoLTKgwG6Xy6dpm0i3LtflptciYSagGHucrnweDztr/1+P3a7vcNjHo+nPdzXrl3LFVdc0f46IiKCe+65h4iICFwuF1dfffV5hXlOTg4lJSWUlJSQk5NzYa0TuQRtE5WGJCrMpXMMT4km0RXGnzcdpNmrpX+l8wQM8/Hjx1NQUABAUVERGRkZ7cfGjh1LYWEhTU1N1NfXU1pa2n58zZo1TJkypf3cvXv3MnfuXHw+Hy0tLXz55ZeMHj26s9sj0mkqjjUC0DdGS7hK57BZLdyW1Z+6hhZWlFSaXY6EkIAT4GbMmMHq1auZM2cOhmGwcOFC8vLySE1NZfr06WRnZzN37lwMw+Dhhx8mLCwMgD179nDbbbe1f056ejrf/OY3ufPOO3E4HNx6660MH64ZndJzVdY3ERNuJ8Kp++XSeW7NGsCrn+7hb1sOc+PovmaXIyHCYhiGYXYRIj3N/6zZy5N/KmZ4sov/M2nIKccinFa8J3ZJO925junannGtmXU5bBb+ecJAJi36GHeTl8KfzcBh03Ifcun0t0ikA21D7CkaYpdOZrFYmDEqhWONXr7YU2t2ORIiFOYiHTh8IsyTo8NMrkRC0YxRrcPrH22tMLkSCRUB75mL9EbqmUtXsNssvLehnKYWH+EOK/9bVM6oftFYLBbsNivfGjfA7BIlSKlnLtKBw8eaAEiOUc9cOpfX58dvQGZKNHUNLeyrPU6Lz8Dr06NqcvEU5iIdOHy0kfhIB2F2zWSXrjGyXwwAWw8dM7kSCQUKc5HT1HqacTd5SY7WELt0nYyUaGxWC9sU5tIJFOYip9lRUQ/ofrl0rXCHjfSkKA4dbeSIp9nsciTIKcxFTrOzPcx1v1y6VttQ+7bD6p3LpVGYi5ymRD1z6SYj+7aG+fbD9SZXIsFOYS5ymh0VbixAkp4xly4WE+GgX2w4e6s92nhFLonCXOQkhmGwo6KePi6nltmUbjE82YXXb7C72hP4ZJGz0E8rkZNUuZuoa2jRELt0m+EprdtE79BQu1wChbnISXYcdgPa9lS6T1pCJA6bpX2uhsjFUJiLnKTtsbS+sQpz6R52m5WhiS4q65sorztudjkSpBTmIifZWanH0qT7DU9xAVCwo8rkSiRYKcxFTlJyuB671aKZ7NKtMpJb75srzOViKcxFTjAMg50VbgYnRmG36ltDuk8fl5P4SAef7qrWhityUfQTS+SEQ0cbqW/yknlidrFId7FYLGT2jaa+0cvGA3VmlyNBSGEuckLb5Le2+5ci3SnjxD8iV+6oNrkSCUYKc5ET2sJcPXMxw7BkFzarRffN5aIozEVO2FHR+oz5cIW5mCDCYWPcoDg2HajjWGOL2eVIkFGYi5ywo6Iep83K4D6RZpcivdSkYYn4DVhXWmN2KRJkFOYigN/fOpN9aFIUdq3JLiaZNCwRgNW7dN9cLox+aokA5XXHOd7ia5+EJGKGrEFxRDptrFbPXC6QwlyE1sViADL7KszFPE67lauGJLCr0s3ho41mlyNBRGEuAuw4sYzr8GQ9libm0lC7XAyFuQhfbT+pnrmYTWEuF0NhLkLrY2nhDiuD4jWTXcyVmRJNosvJ6tJqDMMwuxwJEgpz6fV8foNdVW6GJ0djtVrMLkd6OavVwtfSE6k41kRpldvsciRI2AOd4Pf7eeqppygpKcHpdLJgwQLS0tLajy9btowlS5Zgt9t58MEHmTZtGnV1ddx4441kZGQAcMMNN3Dvvfd2eK6I2cpqPDR7/VrGVUxlt1l4b0M5Xp+fMHtrPyv3411ce2LY3W6z8q1xA8wsUXqwgGG+fPlympubWbp0KUVFRSxatIiXXnoJgKqqKvLz83nnnXdoampi7ty5TJo0ia1bt3LzzTfzs5/9rP1zznau0+nsutaJnIe2ZVz1WJqYzevz0+IzGJwYBbTe/rlqSJ8TR7WbmpxdwGH2wsJCJk+eDEBWVhZbtmxpP7Zp0ybGjRuH0+kkOjqa1NRUtm/fzpYtWyguLubuu+/mhz/8IZWVlWc9V8Rsbcu4ak126SniI530iXKyu8qNz6/75hJYwDB3u924XF8NP9psNrxeb/ux6OivfgBGRUXhdrsZOnQoP/zhD3njjTe44YYbWLBgwVnPDSQ3N5fMzEwyMzPJzc29oMaJnI8S7ZYmPVB6sosmr58DRxrMLkWCQMAwd7lceDye9td+vx+73d7hMY/HQ3R0NFdffTVXXXUVADNmzGDr1q1nPTeQnJwcSkpKKCkpIScn5/xbJnKeSg7XE+W0MSAuwuxSRNplnFjzYGelJsFJYAHDfPz48RQUFABQVFTUPqkNYOzYsRQWFtLU1ER9fT2lpaVkZGTwb//2b3z44YcArF27ltGjR5/1XBEzVdY3sqvSTVZqHBaLZrJLzzE0yYXVAjtPjByJnEvACXAzZsxg9erVzJkzB8MwWLhwIXl5eaSmpjJ9+nSys7OZO3cuhmHw8MMPExYWxvz583niiSd46623iIiIYMGCBSQlJXV4roiZ2hbmmDI8yeRKRE4V7rAxKCGSfTUNHG/24YgI+ONaejGLoVUJpBd7ZGkR724o568/nMyo/jHt7/9x/X5afB1/a0Q4rXh9RofHz3VM1/aMa3tqXR0d+3h7Jcu3VXDXlamMT43jjomDOvxcES0aI72WYRgU7Kwm0RXGCC3jKj1Q214BGmqXQBTm0mttP1xPtbuJycMTtfKb9EgD4iOIcNjYWenW0q5yTgpz6bVW7awCYPLwRJMrEemY1WJhWLKLo8dbqKxvMrsc6cE0o0J6rbcLywE44mnmj+v3t78f7rCZVZLIGYYnu9hcfrR9cSORjijMpVdqbPGxu8pN35hwIpz2UyYd2W1aNlN6juEnVibcofvmcg4aZpde6fM9tXj9RvsEI5GeKjbCQXJ0GKVVbpq8PrPLkR5KYS69Utv98mFawlWCwPBkFy0+g3W7a80uRXoohbn0Sqt2VmO3WhjcJ8rsUkQCGnliDYQPiw+bXIn0VApz6XW2Hz7G9sP1DE2KwmHTt4D0fIP7RBEVZuOj4sPaRU06pJ9k0uss/mgHANcO0yNpEhysFgtj+sdS7W6msOyI2eVID6Qwl15l4/46/r61gglp8Vr1TYLKZQNiAfh/Ww6ZXIn0RApz6VV+9VEJAD+emald0iSopCdHER1u58Mth7UanJxBYS69xme7a1i1s5pJw/pwTXofs8sRuSB2q5UbRqZw8Ggjmw4cNbsc6WEU5tIrGIZxSq9cJBh9fUxfAP6mWe1yGoW59Ap/3nSIL/Ye4YaRyYxLjTe7HJGLMmV4EhEOG3/TULucRmEuIW9/bQM/fXczkU4bP71plNnliFy0CKeNaSOS2FPt0VrtcgqFuYQ0r8/Pj5ZsoL7Jyy9uGc2QRC0SI8Ht62P6AfDehnKTK5GeRGEuIe23/9jJl/vq+Obl/fnnCQPNLkfkks0clUJcpINl6/drrXZppzCXkLVudw25K3YxMD6CZ741Ro+iSUgId9iYPXEQtZ5m/rpZz5xLK4W5hKS6hmYeXlqE1WLhN3PGERPuMLskkU7z7avSsFggf22Z2aVID6H9zCXkGIbBY+9s4tDRRn48M4MJaZq9LsHPbrPw3oZyvD4/AJkp0Xy5r47/+/cdpCVG8a1xA0yuUMyknrmEnD98to8Piyu4emgCD143zOxyRDqN1+enxWfQ4jO4akgCAJ/uqm4PeOm9FOYSUnZU1PPLD7YSF+ng17OzsFl1n1xC0/CUaOIjHWw8UEdDs9fscsRkCnMJGV6fn/nLNtLk9fPvs8bSLzbC7JJEuozVYuGqIX1o8Rl8sVc7qfV2CnMJGb9fW8bm8qPcPm4AM0f3NbsckS43MS0ep93KJyVVuJvUO+/NFOYSEg7WHWfxRyXERTr46U0jzS5HpFtEhtmZPDwRd5OXV1aWml2OmEhhLkHPMAyefL+YhmYfT3xjJH1cYWaXJNJtJg9LIibczn+t2kPFsUazyxGTKMwlqL23oZyf/6mY5dsqSE+KAsPgj+v3a6lL6TWcdiszR6dwvMXH8x/tMLscMUnAMPf7/Tz55JPMnj2b7OxsyspOXaRg2bJl3H777dx5552sWLECgIMHD3LfffeRnZ3N3Xffze7duwHIy8vjpptuIjs7m+zs7Pb3RS6Wp6mF/91Qjs1q4ZbLB+D1Q4vP0KM60qtMTEtgeLKLPxbup+RwvdnliAkChvny5ctpbm5m6dKlzJ8/n0WLFrUfq6qqIj8/nyVLlvDaa6/x/PPP09zczG9+8xvuvvtu8vPzmTdvHs8//zwAxcXFPPfcc+Tn55Ofn8/QoUO7rmXSK/xjWyXHGr1MGZ5IUrSG16V3slkt/H/fGIHfgAV/2artUXuhgCvAFRYWMnnyZACysrLYsmVL+7FNmzYxbtw4nE4nTqeT1NRUtm/fzmOPPUZ0dDQAPp+PsLDWH7LFxcW88sorVFVVcd111zFv3ryuaJP0Erur3BTsqCYuwsHUjORTjp2+Wtbpwh227ihRpNtMy0xm8vBEVu2s5v2ig9ymFeF6lYBh7na7cblc7a9tNhterxe73Y7b7W4PbYCoqCjcbjcJCa0rE+3evZvnnnuO3/3udwDcdNNNzJ07F5fLxUMPPcSKFSuYNm1aZ7dJegHDMPjFn7fiMwz+6bJ+OO1nDjK1rZbVEbtNw/ASOuw2C/9bdJBrhyWybncNT7y3mSMNzbjC7CeOW7Xca4gLOMzucrnweDztr/1+P3a7vcNjHo+nPdzXrVvHD37wA/793/+doUOHYhgG9957LwkJCTidTqZOncrWrVsDFpibm0tmZiaZmZnk5uZecAMlNC3fVsnKHVUMS3Yxpn+M2eWImM7r8xMd7mDmqL40NPt4b0N5+9KvmkMS+gKG+fjx4ykoKACgqKiIjIyM9mNjx46lsLCQpqYm6uvrKS0tJSMjg3Xr1vHMM8/w6quvctlllwGtPfybb74Zj8eDYRh89tlnjBkzJmCBOTk5lJSUUFJSQk5OzsW2U0JIQ7OXpz8oxm61cFtWf21tKnKSa9L7MCg+gk0HjrLt0DGzy5FuEnCYfcaMGaxevZo5c+ZgGAYLFy4kLy+P1NRUpk+fTnZ2NnPnzsUwDB5++GHCwsJYuHAhLS0tPP744wAMGTKEp59+mocffph77rkHp9PJNddcw9SpU7u8gRJ6/v1vJeyvPc4DU4aSEhN+1qF0kd7IarFw+/iBvPDxLt4vKmdIYhQOmzbIDHUWQ9MeJYis2VXN3Fc/Y1iyiw9yruXPGw92GOYRTiveE0OMHTnXcV0b2tf21Lo6+9p/bK/gH9squXJwAndMHMgdEwd1+LkSGrRojASN+sYWHn17EzarhcV3XK4Z6SLnMDUjiZSYMD7fW0tpldvscqSLKcwlaCz4YBvldcf5wXXpXD4ozuxyRHo0u9XK7eMGYgHeLjxAY4vP7JKkCynMJSj8cf1+lq7fz6h+MTx0/XCzyxEJCoMSIpk0LJFqdzO/Xq6lXkOZZkWI6c62uEu4w4bXb7D5QB3/vWYvEQ4bN13Wl/eLytuPi8i53TAyha2HjvFfBbu5+bL+XDYw1uySpAuoZy6ma1vc5fRfXr+fXZX15K8rw2a1cO81acRHhZ1yXETOzWm3cseEgfgN+Mk7m2jRM+chSWEuPdaho43krd6Lz29w1xWppPaJMrskkaA0LNnF7ImD2HboGK8UaIOrUKQwlx7p0NHjvLiilIZmH98aN4AR/bTKm8ileOKmkSRHh/Gb5TvZVanZ7aFGYS49zsG647z26R7cTV5mjR/AhLQEs0sSCXqxEQ5+edsYmn1+Hn9nE36/lhgJJQpz6VHKj7QG+fFmH3OuGMjVQ/uYXZJIyLhxdF9uuqwf68uO8N9r9ppdjnQizWaXHmN/bQN5a/bQ1OJn1oSBXDW0D14t1SpyyU7eEnhCWhwrSipZ+NdtHG/x0j8uUjuqhQD1zKVHKKvx8Prq1iC/Y+IgxqfGm12SSEhpe2ok3GHn1qwBeP0GSz7fT5MWkwkJCnMx3e4qD3lr9tLi8zP7ikFkaXU3kS512YBYxg6MZf+R46zcUWV2OdIJFOZiqmXr9/PKqt14fX7mXJHK2IEKcpHucMvl/YkOt/NRcQVbD2qr1GCnMBdTtPj8/Pz9Lfzk7U04bVbuvWYwYwZoZSqR7hLptHP7uIH4DIOH3voSd5PX7JLkEmgCnHS505drdTd6yV9Xxu5qD/1iw7nnmjRiI5wmVijSO2X2jWbK8EQKdlbz2NubeGHuOCwWi9llyUVQmEuXa5t4A62Pnr3xWRlHj7cwun8Md1+dit1qPeseziLStb5xWT+Ot/j4y+ZDjF8dz/3XDjG7JLkIGmaXblO0/wgvF5Ry7HgLM0alMPfKVG2WImIym9XCC3PHk+gK49m/buOLvbVmlyQXQWEuXc4wDD4pqWTZ+gPYrBayr05jWmayhvNEeoiUmHBemDsOA5iXX8iuynqzS5ILpDCXLuX3G3yw6RAfba0gLsLBg9ela511kR7o6qF9eOa2MdR6mrn71c/ZX9tgdklyARTm0mW8Pj8/eWcTBTurSXKFMW9qOsnR4WaXJSJnMefKVP7tppEcPtbI3FfXcfhoo9klyXnSBDjpElsPHuMn72xkS/kxBsZHcO81g4kK0183kZ7m5KVeoXVDlhkjk/n7tkq++cKnPDBlKN+bPNTkKiUQ/XSV83L642Uns9us7Ws7H/E08/rqPbz0SSlev8Ht4wcwblAcNqsGgUR6qpOfOAG4LjOZJq9Bwc4qfv33HYzuF8PXhiWaWKEEojCX83L6N3tji4+DdceprG+i2t3Esi/2s7Oynmp3MwAD4iJ45ltjuC4zmT+u369Hz0SCiMVi4etj+pLocvJ+0UGyX/+cn39zFNlXp2niag+lMJcL4vX5Wb2rmhUlVTSf1FO3WGBQfCTTR8QxdmAc908egkvD6iJBbeLgBPrGhrP0i/08+X4xf99awYLbxpDWJ8rs0uQ0+mkr563k8DE+2HSIGk8zkU4bVw9NJCUmnP5x4Xx/6jAinHpmXCTUDEmM4k851/LEu5tZuaOKmb8uIOf6YXx38lCtE9GDKMwloCavj3c3lLO2tAarBb6W3ofpI1Law9thsyjIRUKU3Wbh8z213HRZXwbFR/D+xoP86qMdvLSylKkZyTx7+2XERjjMLrPXU5jLOZXXHedf/vAlG/fXkRITxuwrUukbo8fLRHoTr8+P1w+j+scyJNHFqp1VrNtTw183H2JlSSW3ZA3g1qz+XDk4AatV99TNoDCXDvn8Bu9+eYCFf93GkYYWxqfGccvlA3Daz5yVfvqjLSfTMJxIaIlw2pg5ui9TMpIoLDtCYdkR3vp8H299vo++MeHMHJ3C19L7cNWQPsRHaQOl7qIwlzMU7Khi4V+3sf1wPWF2KwtuG4PTZsHb8ZNpwJmz3dvYbee4SESCVrjDxnWZSfz2rnF8truG94sO8tcth/iftWX8z9oyLBbISI5mVP8YRvaLZlS/WEb2i6aPK8zs0kNSwDD3+/089dRTlJSU4HQ6WbBgAWlpae3Hly1bxpIlS7Db7Tz44INMmzaN2tpafvzjH9PY2EhycjLPPvssERERHZ4rnc/nNzh09Dj7ahvaV3CyWS3YrBZcYXZiIxzERTqxWuB4i4/GFj97qt2sK61l7e4a9tU2YLHArPEDmT8zg/5xEfxx/X5Aj5eJyFfsNgt/2ngQr8/PxMHxZKXGsr/2OLsq3ZRWudlT46Gkop73Nnx1TXJ02ImAj2FUv9b/pvWJxGHTWhSXImCYL1++nObmZpYuXUpRURGLFi3ipZdeAqCqqor8/HzeeecdmpqamDt3LpMmTeLFF1/k5ptv5vbbb+eVV15h6dKl3HTTTR2e63RqGOZitfj8HKpr5MCRBnZWuik+eJTVu2o4fKwRn//igjfcYeWyAbFMH5nMgLgIVu+q1lC5iJzVqaNyFgbGRzIwPpLrMpMJc1iocbewr9bDobpGDh49zsG6Rj4pqeKTkqr2z7BaoH9cBKkJkSRFhxEb4SA2woErzE64w0a4w0q4w0aY3UaE00a43XrifRsRDhvhTiuRTjuRDluvvWcfMMwLCwuZPHkyAFlZWWzZsqX92KZNmxg3bhxOpxOn00lqairbt2+nsLCQefPmATBlyhSef/55Bg0a1OG5Y8eO7aKmndvft1awamcVhgEGxon/gtGegSfeO+04fPUe7dcYpx076b22c9uOneXrGYZBi984MdGk9b8+v0GLz2j9r7/1tddn0HLinKPHW84IbYfNQv/YcPq4wugT5SQu0oH1xCIPlw2Mpb7Ry9HjLdQ1tC7u0vYNkRwdRkOzj5SYsPbzT2a3WYGzrABntbY2tIPj5zqma3WtGdf21LpC8VqnzUZStJX4SAeXD/zqfU+TlxpPM+VHjnPgyHGq3U3UeJpZU1rT4edciDC7lagwOxEOG2F2K1arBZvF0vpfK1/9/qT/2qxtv28dxbSe9J7V0vr+6ee2/ZS0WKDtlc1q4Z8nDGTMgNhLbseFChjmbrcbl8vV/tpms+H1erHb7bjdbqKjo9uPRUVF4Xa7T3k/KiqK+vr6s54bSG5uLi+88AIADz30EDk5OeffunOYMSqFGaNSzuvrd9bX7Mlyc3P5YYi3szf8WfaGNkLvaGdvaCOcaOePQ7ud3fFnGfAmhcvlwuPxtL/2+/3Y7fYOj3k8HqKjo0953+PxEBMTc9ZzA8nJyaGkpISSkhJT/mK3/UMi1PWGdqqNoaM3tLM3tBF6Rzu7o40Bw3z8+PEUFBQAUFRUREZGRvuxsWPHUlhYSFNTE/X19ZSWlpKRkcH48eNZuXIlAAUFBUyYMOGs54qIiMilCTjMPmPGDFavXs2cOXMwDIOFCxeSl5dHamoq06dPJzs7m7lz52IYBg8//DBhYWE8+OCDPPbYYyxbtoz4+HgWL15MZGRkh+eKiIjIpQkY5larlaeffvqU99LT09t/f+edd3LnnXeecjwxMZHXXnvtjM/q6Nye7qGHHjK7hG7RG9qpNoaO3tDO3tBG6B3t7I42Wgzjq/nbIiIiEnz0lL6IiEiQU5iLiIgEOYW5iIhIkFOYi4iIBDmFuYiISJDTFqhAS0sLTzzxBOXl5TQ3N/Pggw8yffr09uN5eXm8/fbbJCQkAPCLX/yCoUOHmlXuJbntttvaV94bOHAgzz77bPuxUNnV7t133+W9994DoKmpiW3btrF69WpiYmIAWLBgAV9++SVRUVEAvPjii+e1GmFPsXHjRn71q1+Rn59PWVkZjz/+OBaLheHDh/Pzn/8cq/Wrf6M3Njby6KOPUlNTQ1RUFM8991z73+Oe7OQ2btu2jV/+8pfYbDacTifPPfcciYmJp5x/rr/XPdnJ7SwuLub73/8+gwcPBuCuu+7iG9/4Rvu5ofBn+fDDD1NdXQ1AeXk5l19+Ob/+9a/bzzUMgylTprT/P8jKymL+/PlmlH3eOsqPYcOGdf/3pSHG22+/bSxYsMAwDMOora01pk6desrx+fPnG5s3bzahss7V2Nho3HrrrR0eq6ysNG6++WajqanJOHbsWPvvg91TTz1lLFmy5JT35syZY9TU1JhU0aV55ZVXjJtvvtm44447DMMwjHnz5hnr1q0zDMMwfvaznxkfffTRKee//vrrxm9/+1vDMAzjgw8+MH75y192b8EX4fQ2fvvb3za2bt1qGIZhvPXWW8bChQtPOf9cf697stPbuWzZMuO111476/mh8GfZpq6uzrjllluMioqKU97fu3evMW/evO4s8ZJ1lB9mfF9qmB34+te/zo9+9KP21zbbqVt+FhcX88orr3DXXXfx8ssvd3d5nWb79u0cP36c73znO9xzzz0UFRW1Hzt5B7zo6Oj2Xe2C2ebNm9m1axezZ89uf6pibGoAAASDSURBVM/v91NWVsaTTz7JnDlzePvtt02s8MKlpqaSm5vb/rq4uJgrr7wSaN2hcM2aNaecf/Kuh1OmTGHt2rXdV+xFOr2Nzz//PCNHjgTA5/OdsXLkuf5e92Snt3PLli188sknfPvb3+aJJ544YyOqUPizbJObm8vdd99NcnLyKe8XFxdTUVFBdnY23/ve99i9e3d3lXrROsoPM74vFea07uDmcrlwu9388Ic/5F//9V9POX7TTTfx1FNP8fvf/57CwkJWrFhhUqWXJjw8nPvvv5/XXnuNX/ziF/z4xz/G6/UCXPSudj3Zyy+/zA9+8INT3mtoaODuu+/mP/7jP3j11Vd58803g+ofLTfeeGP7RkfQOixpObFlbdsOhSfraAfDnu70Nrb9wP/yyy954403uO+++045/1x/r3uy09s5duxYfvKTn/CHP/yBQYMG8bvf/e6U80PhzxKgpqaGtWvXcvvtt59xflJSEg888AD5+fnMmzePRx99tLtKvWgd5YcZ35cK8xMOHTrEPffcw6233so3v/nN9vcNw+Dee+8lISEBp9PJ1Kn/f3t37JJqFIdx/Cv2BoJDgw0FDTaWUxhoQ3eKFqmliBoabGiTiCwiqkGHlraGqJYwkF4aain/g5AgKKixGiIQm8ISEvEOkmTa5d64XO+J5zPJe5ZzOOf48L7n5f394Pr6uoE9/Tqv18vQ0BAOhwOv10tLSwvZbBb4vAKeqZ6enri5uSEQCFRdd7lcTE5O4nK5cLvdBAIBo8L8o/fncG8VCt+rV8HQRMfHx6yurrK1tVVztvirdW2SgYEBfD5f5ffH/5nvMpepVIpQKFTzBBTA5/NV3lfy+/1kMhlKBnyk9GN+NGJfKsyBx8dHwuEw0WiUkZGRqrZcLkcoFOL5+ZlSqUQ6na5sONMcHBywtrYGQCaTIZfL0draCnxeAc9UZ2dn9PX11Vy/u7tjYmKCYrFIoVDg/Pyc7u7uBvTw7+jq6iKdTgPlCoV+v7+qvV4FQ9McHR2xt7dHIpGgo6Ojpv1X69okU1NTXF5eAnB6elqzLr/DXEJ5bP39/XXbNjY22N3dBcrHJ+3t7ZU73P9VvfxoxL7Ut9kpv918cnJS9Yb66Ogo+XyesbExDg8PSSQSNDc3EwwGiUQiDezt172+vrK4uMjDwwMOh4O5uTkuLi4qFfBs22Z/f59SqcT09DSDg4ON7vKX7ezs0NTUVHkk+77S3/b2NqlUCsuyGB4eZnx8vLGd/UP39/fMzs5i2za3t7csLy9TKBTo7OwkHo/jdDoJh8Nsbm5SLBZZWFggm81iWRbr6+tGBN3bGJPJJMFgkLa2tsrdS29vL5FIhPn5eWZmZvB4PDXruqenp8Ej+D3v5/Lq6opYLIZlWXg8HmKxGG63+9vMpW3bQPnYMplMVt2Nvo0xn88TjUZ5eXnB6XSysrJSVdjrf1QvP5aWlojH4/90XyrMRUREDKfH7CIiIoZTmIuIiBhOYS4iImI4hbmIiIjhFOYiIiKGU5iLiIgYTmEuIiJiOIW5iIiI4X4Cr6eDhEnuArgAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 576x396 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.distplot(bc_y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "def bcback(bc_y,maxlog): #将变换后的数值进行还原\n",
    "    return np.power((bc_y*maxlog+1),1/maxlog)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x25f845ec788>"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgQAAAFQCAYAAADA9WbqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XtglNWB///3XDLJZGYSEgURNCho0qLNksDaCxDLItou7C7FXiBbtF/7tZDa2CKhsbZa1CwXLe5uY7FSLfpLF0kq7W5rv9aWakkFddeUSINNXEGpgoVABDIDmdvz/P5IZiCYMAl5JjPA5/WPznPOzJwcL/lwrjbTNE1ERETkvGZPdQNEREQk9RQIRERERIFAREREFAhEREQEBQIRERFBgUBERERQIBAREREUCERERAQFAhEREUGBQERERFAgEBERERQIREREBAUCERERQYFAREREUCAQERERFAhEREQEBQIRERFBgUBERERQIBAREREUCERERAQFAhEREUGBQERERFAgSLoDR7t462Ag1c0QERE5LQWCJPv6xmbmr3sp1c0QERE5LQWCJOsIhDjQGcQ0zVQ3RUREpF8KBEkWihqYJgQjRqqbIiIi0i8FgiQL9QSBrnA0xS0RERHpnwJBkoWi3YHguAKBiIikMQWCJIuNEBwLKRCIiEj6UiBIslggOK5AICIiacyZqIJhGCxfvpy2tjZcLhc1NTWMGzcuXt7Q0MDGjRtxOp1UVFQwY8YMOjo6qKqqoquri1GjRrFy5Urcbnefdfft28ddd91FNBrFNE3uu+8+xo8fz/PPP88PfvADnE4nN954I5///Ofp6upi2bJlHDp0CI/Hw+rVq8nPz09qBw1VOKo1BCIichYwE3juuefM6upq0zRNc/v27ebixYvjZQcOHDDnzJljBoNB8+jRo/G/v//++81NmzaZpmmajz76qLl+/fp+637zm980f/vb35qmaZqNjY3mbbfdZoZCIfO6664zDx8+bAaDQXPevHnmgQMHzB//+Mfm97//fdM0TfOZZ54x77///kTNT6lo1DDHVT9jjqt+xtzSdiDVzREREelXwimDpqYmpk+fDsCkSZNoaWmJl+3YsYOSkhJcLhc+n4+CggJaW1t7vaesrIxt27b1W7e6upprr70WgGg0SmZmJrt27aKgoIDc3FxcLheTJ0/m1Vdf/cDnvvRSeh/4E1tQCFpUKCIi6S1hIPD7/Xi93vhrh8NBJBKJl/l8vniZx+PB7/f3eu7xeOjs7Oy3bn5+PhkZGezevZvVq1dz2223DepzE6mtraWoqIiioiJqa2sT1rfSyWcPaMpARETSWcI1BF6vl0DgxFn8hmHgdDr7LAsEAvh8vvjzrKwsAoEAOTk5/dYFePnll7n33nt54IEHGD9+PKFQ6LSfG3uWk5OT8AesrKyksrIyYb1kCJ88QqBFhSIiksYSjhCUlpbS2NgIQHNzM4WFhfGy4uJimpqaCAaDdHZ2smvXLgoLCyktLWXLli0ANDY2Mnny5H7rvvzyy/zLv/wLjz32GB/5yEcAmDBhAnv27OHw4cOEQiFeffVVSkpK+vzcdBY6aYRA2w5FRCSd2Uzz9Ifsx3YZvPHGG5imyYoVK2hsbKSgoICZM2fS0NBAfX09pmmyaNEibrjhBg4ePEh1dTWBQIC8vDzWrFlDdnZ2n3X/8R//kVAoxMiRIwG4/PLLue++++K7DEzT5MYbb+Sf//mfOX78ONXV1bS3t5ORkcGaNWvi70tHbx8M8Mnv/R6AZTcUcduMK1LbIBERkX4kDARy5v53fyez/rV7dKXy765g6fVFKW6RiIhI33QwURIFNWUgIiJniYSLCmVwfr59L5GexYR7Dp1YGLlz3xF+vn0vnykZm6qmiYiI9EuBwGKRqEE42j0LE4ycmI0Jho14UBAREUk3mjJIoohxIgCEFAZERCSNKRAkUTR6YoQgrEAgIiJpTIEgiSLGiUAQimgzh4iIpC8FgiSKGhohEBGRs4MCQRJFFAhEROQsoUCQRFpUKCIiZwsFgiTqNWUQUSAQEZH0pUCQRL3XEGhRoYiIpC8FgiQ6dQ2Bro0QEZF0pUCQRLERAqfdhknvgCAiIpJOFAiSKNIzTeB2OQAIaR2BiIikKQWCJIr27DJwZ3QHAm09FBGRdKVAkESxKYJYINAIgYiIpCsFgiSKrSGITxlohEBERNKUAkESnTpCoK2HIiKSrhQIkigWCLK0qFBERNKcAkESRU9dQ6ApAxERSVMKBEmkXQYiInK2UCBIolPPIdB9BiIikq4UCJIoapjYbZDp7O5mTRmIiEi6UiBIoohh4rDbyHD0BIKIdhmIiEh6UiBIougpgUBrCEREJF0pECRRxDBx2u24NGUgIiJpzpmogmEYLF++nLa2NlwuFzU1NYwbNy5e3tDQwMaNG3E6nVRUVDBjxgw6Ojqoqqqiq6uLUaNGsXLlStxud591Y5544gkOHjxIVVUV7e3t3HHHHfGyP//5zyxdupT58+dTVlbGZZddBsCkSZNYunSphd1hrahh4LTbyHDYAJ1DICIi6SthINi8eTOhUIj6+nqam5tZtWoVjzzyCADt7e3U1dWxadMmgsEg5eXlTJ06lbVr1zJnzhzmzZvHunXrqK+vZ/bs2X3WNQyD73znO+zYsYPrr78egJEjR1JXVwfA9u3b+dd//Vc+//nP85e//IWrrrqKH/7wh0nsEutEDBOXw45LUwYiIpLmEk4ZNDU1MX36dKD7T+QtLS3xsh07dlBSUoLL5cLn81FQUEBra2uv95SVlbFt27Z+6waDQebOncvixYs/8N2maXL//fezfPlyHA4HO3fuZP/+/SxcuJBbb72V3bt3W9UPSRGJdq8hiAUCTRmIiEi6ShgI/H4/Xq83/trhcBCJROJlPp8vXubxePD7/b2eezweOjs7+62bm5vLtGnT+vzu559/niuvvJLx48cD3SMHX/nKV6irq2PRokUsW7Ys4Q9YW1tLUVERRUVF1NbWJqxvpahp4nTYyIitIdCUgYiIpKmEUwZer5dAIBB/bRgGTqezz7JAIIDP54s/z8rKIhAIkJOT02/d0/nFL37BTTfdFH999dVX43B0H/IzZcoU9u/fj2ma2Gy2fj+jsrKSysrKRD9mUkSjJg6bDae9u3263EhERNJVwhGC0tJSGhsbAWhubqawsDBeVlxcTFNTE8FgkM7OTnbt2kVhYSGlpaVs2bIFgMbGRiZPntxv3dPZuXMnpaWl8dcPP/wwTz75JACtra2MGTPmtGEglQzT7BkhsGOzdS8s1BoCERFJVwlHCGbNmsXWrVuZP38+pmmyYsUK1q9fT0FBATNnzmThwoWUl5djmiZLliwhMzOTiooKqquraWhoIC8vjzVr1pCdnd1n3f50dHTg8Xh6/cL/yle+wrJly9iyZQsOh4OVK1da0wtJYPRcbBQbHXA57JoyEBGRtGUzTVPj2Bb66avvEI6adIWj3PfM63xotI+bPn4ZDzzXCkDzPdenuIUiIiIfpIOJkiTSM0Lg6BkhyHDYdbmRiIikLQWCJIn2NWWgNQQiIpKmFAiSJBofIeju4gyHnXDURDM0IiKSjhQIkiTSMxoQHyFwdv+1K6xRAhERST8KBEkSX0PgOLGGAOB4OJqyNomIiPRHgSBJ4msIbCfWEIACgYiIpCcFgiTpd4QgFElZm0RERPqjQJAkp+4yiF2BfDykNQQiIpJ+FAiSJGrEFhV2d7HLqSkDERFJXwoESdLXwUSgQCAiIulJgSBJ+g0EWkMgIiJpSIEgSfo6qRA0QiAiIulJgSBJotFTRghiawi0qFBERNKQAkGSRGKLCntGBlyxXQYaIRARkTSkQJAkH9x2qDUEIiKSvhQIkuTURYXadigiIulMgSBJ+t9loDUEIiKSfhQIkqTfKYOwpgxERCT9KBAkSfQDIwSxo4s1ZSAiIulHgSBJIjq6WEREziIKBEkSifY3ZaA1BCIikn4UCJIkesr1x067DRvadigiIulJgSBJTt1lYLPZcDntmjIQEZG0pECQJKfuMoDuaQMtKhQRkXSkQJAkJwLBiS7OcNjo0hoCERFJQwoESRLbZeA4aYTA5bRzTGsIREQkDSkQJEnEMLEBJ+UBXA6tIRARkfTkTFTBMAyWL19OW1sbLpeLmpoaxo0bFy9vaGhg48aNOJ1OKioqmDFjBh0dHVRVVdHV1cWoUaNYuXIlbre7z7oxTzzxBAcPHqSqqgqA9evX8/TTT5Ofnw/Avffey5gxY1i2bBmHDh3C4/GwevXqeHm6iRomDrsNm633GoKusIFhmNhPTgoiIiIplnCEYPPmzYRCIerr61m6dCmrVq2Kl7W3t1NXV8fGjRt5/PHHeeihhwiFQqxdu5Y5c+awYcMGJk6cSH19fb91u7q6qKqqYsOGDb2+d+fOnaxevZq6ujrq6uoYP348Tz31FIWFhWzYsIG5c+eydu1a63vEIrFAcLLY4URdEY0SiIhIekkYCJqampg+fToAkyZNoqWlJV62Y8cOSkpKcLlc+Hw+CgoKaG1t7fWesrIytm3b1m/dYDDI3LlzWbx4ca/v3blzJ+vWrWPBggU8+uijH2hLWVkZL730kjW9kASRqNlrhwF0TxmAji8WEZH0kzAQ+P1+vF5v/LXD4SASicTLfD5fvMzj8eD3+3s993g8dHZ29ls3NzeXadOmfeB7Z8+ezfLly3nyySdpamrihRde6PNzE6mtraWoqIiioiJqa2sT1rdKxDBwOnp3b4aOLxYRkTSVcA2B1+slEAjEXxuGgdPp7LMsEAjg8/niz7OysggEAuTk5PRbty+maXLzzTfHy6+99lpef/31Xp8R+9xEKisrqaysTFjPalHD/EAgyOwJBJ1d2mkgIiLpJeEIQWlpKY2NjQA0NzdTWFgYLysuLqapqYlgMEhnZye7du2isLCQ0tJStmzZAkBjYyOTJ0/ut25f/H4/c+bMIRAIYJomr7zyCldffXWfn5uuIn2sIch2OQA4cjyciiaJiIj0K+EIwaxZs9i6dSvz58/HNE1WrFjB+vXrKSgoYObMmSxcuJDy8nJM02TJkiVkZmZSUVFBdXU1DQ0N5OXlsWbNGrKzs/us2xefz8eSJUu46aabcLlcfPzjH+faa6/lmmuuobq6mgULFpCRkcGaNWss7xCrRI0PriGIBYLDxxQIREQkvdhM0zRT3YhzyU9ffYdw1OSe/2rh4twsKj55Rbxsx7uH2fg/77Bq3keYf01BClspIiLSmw4mSgLTNE87ZXBYUwYiIpJmFAiSIGp+8B4DALemDEREJE0pECRB9JSrj2NOLCoMDXubRERETkeBIAmi0f4CQfcaTo0QiIhIulEgSIJI7OpjR+9A4M7QlIGIiKQnBYIkiAeCU0YIHHYbvkwn7x/TlIGIiKQXBYIk6G8NAcAIT4YOJhIRkbSjQJAEEcMAwGH/YPeOcLs0ZSAiImlHgSAJov1MGQCMyM7geDhKly44EhGRNKJAkASnmzLIdWcAcFTTBiIikkYUCJKgv0WF0D1CADqtUERE0osCQRJEoqcJBG4XAO8HtNNARETShwJBEsSnDBx9LCrUCIGIiKQhBYIkOLHLoK8pg+4RgiPaaSAiImlEgSAJTrvLwB0bIdCUgYiIpA8FgiQY0KJCjRCIiEgaUSBIgtOeVKg1BCIikoYUCJLgdCMEuT27DA7rPgMREUkjCgRJEI2e5uhiTRmIiEgaUiBIgojZ/5RBhsOON9OpQCAiImlFgSAJoqc5mAi6jy/WjYciIpJOFAiSIL6GwNF3IBiRnaE1BCIiklYUCJLgdLsMoDsQBEJRQhFjOJslIiLSLwWCJIgkCgSxnQY6nEhERNKEAkESRHuOLnb2scsAILdnp4GOLxYRkXShQJAEpzuHACBPhxOJiEiacSaqYBgGy5cvp62tDZfLRU1NDePGjYuXNzQ0sHHjRpxOJxUVFcyYMYOOjg6qqqro6upi1KhRrFy5Erfb3WfdmCeeeIKDBw9SVVUFwDPPPMOTTz6Jw+GgsLCQ5cuXY7fbmTt3Lj6fD4BLLrmElStXWt0nQ5ZwDUH8cCIFAhERSQ8JA8HmzZsJhULU19fT3NzMqlWreOSRRwBob2+nrq6OTZs2EQwGKS8vZ+rUqaxdu5Y5c+Ywb9481q1bR319PbNnz+6zrmEYfOc732HHjh1cf/31AHR1dfFv//Zv/PKXv8TtdnPHHXfwwgsvMG3aNADq6uqS2CVDF0m07TB+OJHWEIiISHpIOGXQ1NTE9OnTAZg0aRItLS3xsh07dlBSUoLL5cLn81FQUEBra2uv95SVlbFt27Z+6waDQebOncvixYvjn+tyudi4cSNutxuASCRCZmYmra2tHD9+nFtuuYWbbrqJ5uZmSzvDKvERgv62Hbp1WqGIiKSXhIHA7/fj9Xrjrx0OB5FIJF4WG74H8Hg8+P3+Xs89Hg+dnZ391s3NzY3/yT/eKLudCy+8EOgeDTh27BhTp04lKyuLL3/5yzz++OPce++9VFVVxdvSn9raWoqKiigqKqK2tjbRj2uJ+C4DW3/bDrXLQERE0kvCKQOv10sgEIi/NgwDp9PZZ1kgEMDn88WfZ2VlEQgEyMnJ6bdufwzD4MEHH+Stt96itrYWm83G5Zdfzrhx4+J/P2LECNrb27n44ov7/ZzKykoqKysT/ZiWihoGDrsNW7+BQCMEIiKSXhKOEJSWltLY2AhAc3MzhYWF8bLi4mKampoIBoN0dnaya9cuCgsLKS0tZcuWLQA0NjYyefLkfuv255577iEYDLJ27dr41MHTTz/NqlWrANi/fz9+v5+RI0ee+U+fJBHD7Hf9AOgKZBERST8JRwhmzZrF1q1bmT9/PqZpsmLFCtavX09BQQEzZ85k4cKFlJeXY5omS5YsITMzk4qKCqqrq2loaCAvL481a9aQnZ3dZ92+7Ny5k6effpopU6Zw8803A3DTTTfx2c9+lm9961ssWLAAm83GihUr4qMV6SRqmP3uMIDuuwxA5xCIiEj6sJlmz9V8YomfvvoOK59tJRI1uPPTH+5VluGw8bkplwIw8Z5fM36kh2cqp6eimSIiIr3oYKIkSDRCAN07Dd4PaIRARETSgwJBEkQME0c/xxbH5Ga7dAWyiIikDQWCJIgaxmkXFUL3CIE/GCEc1Y2HIiKSegoESRCJmjj7OZQoJrbTQKMEIiKSDhQIkiBqmP0eShQTP5xIOw1ERCQNKBBYzDBNTPq/2CjmxAiBTisUEZHUUyCwWKKbDmNi9xlop4GIiKQDBQKLDTQQ5PVMGXToxkMREUkDCgQWiwUCe4I1BPmenkAQUCAQEZHUUyCwWNQc2AjBBV4FAhERSR8KBBYzBjhlcIGn+x6Hg/5g0tskIiKSiAKBxQY6ZaARAhERSScKBBYb6KLCbJeDTKedQ34FAhERST0FAoudWENw+no2m40LvZkaIRARkbSgQGCx+AhBgikD6N5pcNAfRDdQi4hIqikQWMzouavInmDKALrXEQQjBsdC0SS3SkRE5PQUCCwWGeAaAjhxFoHWEYiISKopEFjMMAc+ZXBBLBAEtPVQRERSS4HAYgPdZQBwgbf7LAItLBQRkVRzproB55rTnUPgdNj4+fa9RKLdCw12t/sB+HXLX+kIhHA67HymZOzwNVZERKSHAoHFEh1dHIkahKPddbIyHAAc7Yr0PDOGpY0iIiKn0pSBxQYzZeBxdeexQDCS1DaJiIgkokBgscGcQ+DNVCAQEZH0oEBgsdgug4GcQ+DpCQR+BQIREUkxBQKLnZgySFzX5bST4bARCCkQiIhIaikQWOxEIBhY13oynQSCOqlQRERSK+FvLcMwuOeee/jCF77AwoUL2bNnT6/yhoYG5s2bx+c//3leeOEFADo6OrjlllsoLy/nG9/4BsePH++3bswTTzzB9773vfjr559/nhtvvJEvfOELNDQ0ANDV1UVlZSXl5eXceuutdHR0DO2nT4ITawgGVt+b6SQQjOg+AxERSamEgWDz5s2EQiHq6+tZunQpq1atipe1t7dTV1fHxo0befzxx3nooYcIhUKsXbuWOXPmsGHDBiZOnEh9fX2/dbu6uqiqqmLDhg3xzw2Hw6xcuZIf//jH1NXVxd//1FNPUVhYyIYNG5g7dy5r165NTq8MQXQQawige6dBxDAJRrTlUEREUidhIGhqamL69OkATJo0iZaWlnjZjh07KCkpweVy4fP5KCgooLW1tdd7ysrK2LZtW791g8Egc+fOZfHixfHP3bVrFwUFBeTm5uJyuZg8eTKvvvrqBz73pZdesrQzrGAMYpcBnFhYqJ0GIiKSSgkDgd/vx+v1xl87HA4ikUi8zOfzxcs8Hg9+v7/Xc4/HQ2dnZ791c3NzmTZt2ge+c6Cfm25ilxsNeIQgs/twooBuPBQRkRRKGAi8Xi+BQCD+2jAMnE5nn2WBQACfz9freSAQICcnp9+6A/nO031uIrW1tRQVFVFUVERtbW3C+kMVGyFwDmLKADRCICIiqZUwEJSWltLY2AhAc3MzhYWF8bLi4mKampoIBoN0dnaya9cuCgsLKS0tZcuWLQA0NjYyefLkfuv2ZcKECezZs4fDhw8TCoV49dVXKSkp6fNzE6msrKStrY22tjYqKysT98gQxdcQDHDKQIcTiYhIOkh4l8GsWbPYunUr8+fPxzRNVqxYwfr16ykoKGDmzJksXLiQ8vJyTNNkyZIlZGZmUlFRQXV1NQ0NDeTl5bFmzRqys7P7rNuXjIwM7rzzTr785S9jmiY33ngjF110EQsWLKC6upoFCxaQkZHBmjVrLO+Qoeq5t2hARxfDiSkDHU4kIiKpZDO1381SC370Mi/tOsTtM69kdE5WrzK3y04kasYvNwJ49/1jrP39LqZOuIC5JWP53JRLh7vJIiIiOpjIarFzCJyD3WWgRYUiIpJCCgQWiw52l4EWFYqISBpQILCYMYjrj6H7PgOXw641BCIiklIKBBY7sctg4O/xZDo0QiAiIimlQGCx+BqCAV5uBCcuONL6ThERSRUFAoudWEMw8Pd4M51ETZMu3WcgIiIpokBgsegg7zIALSwUEZHUUyCwmDHI2w7hpMOJuhQIREQkNRQILBY1TGwM/OhiOHEWgT+kQCAiIqmhQGCxqGEOeMthTPxwIo0QiIhIiigQWCxqDj4QxC448gd1WqGIiKSGAoHFooY5qOkCOBEIjnaFk9EkERGRhBQILGacwZRBXrYLgPcDoWQ0SUREJCEFAotFziAQuF0OsjLsdBxTIBARkdRQILCYYQ78HoOT5WW76AiEdFqhiIikhAKBxc5kDQF0B4Jw1OSQpg1ERCQFFAgs1r3tcPDvy8vOAOCdjmMWt0hERCQxBQKLRQ1zUMcWx+R5uhcWvvv+caubJCIikpACgcXO5BwCOLHT4J33NUIgIiLDT4HAYoZhDuoeg5jYCME7HRohEBGR4adAYKGoYWIyuJsOY2JrCN7VCIGIiKSAAoGFwlEDOLNth5lOBx6XQ2sIREQkJRQILDSUQACQ73Gx9/3jGIbOIhARkeGlQGChSLT7F/mZnEMA3YEgFDU40Bm0slkiIiIJKRBYKGwMbYRAOw1ERCRVFAgsFBshGMqUAehwIhERGX4KBBaKrSEYypQB6HAiEREZfs5EFQzDYPny5bS1teFyuaipqWHcuHHx8oaGBjZu3IjT6aSiooIZM2bQ0dFBVVUVXV1djBo1ipUrV+J2uwdc9+2332bFihXx72hubuYHP/gBxcXF3HDDDRQWFgJw3XXXcfPNNyehW85MeIgjBHkeHV8sIiKpkTAQbN68mVAoRH19Pc3NzaxatYpHHnkEgPb2durq6ti0aRPBYJDy8nKmTp3K2rVrmTNnDvPmzWPdunXU19cze/bsAdf90pe+RF1dHQDPPvsso0aNoqysjG3btjFnzhzuvvvu5PbKGYrE1xCc2fu1hkBERFIl4a+upqYmpk+fDsCkSZNoaWmJl+3YsYOSkhJcLhc+n4+CggJaW1t7vSf2i3wwdWOOHTtGbW0t3/72twFoaWlh586dfPGLX+T222/nwIED1vWEBeJrCM5wyiDDYeeinExNGYiIyLBLGAj8fj9erzf+2uFwEIlE4mU+ny9e5vF48Pv9vZ57PB46OzsHVTfm6aef5lOf+hT5+fkAjB8/nttvv52f/OQnXHfdddTU1CT8AWtraykqKqKoqIja2tqE9YcivobgDKcMAC7Jy+a9I11Eej5LRERkOCQMBF6vl0AgEH9tGAZOp7PPskAggM/n6/U8EAiQk5MzqLoxv/zlL/nc5z4Xf/2xj32Mj370owDMmjWL119/PeEPWFlZSVtbG21tbVRWViasPxRDXUMAcGmem6hh8t6RLquaJSIiklDCQFBaWkpjYyPQvbgvtqAPoLi4mKamJoLBIJ2dnezatYvCwkJKS0vZsmULAI2NjUyePHlQdQE6OzsJhUJcfPHF8e/7zne+w3PPPQfASy+9xFVXXWVRN1gj9qf6M50yALg0PxvQOgIRERleCRcVzpo1i61btzJ//nxM02TFihWsX7+egoICZs6cycKFCykvL8c0TZYsWUJmZiYVFRVUV1fT0NBAXl4ea9asITs7e8B1Ad566y3Gjh3bqy1Lly7lrrvu4qmnnsLtdg9oymA4hY2hjxBckucG4N2O4zDBkmaJiIgkZDNNUwfnW+R3f97Pl598lU9dNZqywpEfKHe77ESiZnxq4VQZDhtjR7gpf+wVKv/uCpZeX5TsJouIiAA6mMhSQ73cCE5MGWingYiIDCcFAgtZsahwdG4WTruNPYcCiSuLiIhYRIHAQvGDiYawqDDDYacgP5td7QE0myMiIsNFgcBCsRGCoZxDADB+pIcjx8N0BEJWNEtERCQhBQILnVhDMLTPmTCy+yCo3Qc1bSAiIsNDgcBCJ64/Hlq3jh/pAWB3u3/IbRIRERkIBQILxUcIhjZjwPieEYJd7RohEBGR4aFAYKGIYc0agviUgUYIRERkmCgQWMiKo4sB8j0uRmRnsFsjBCIiMkwUCCwUsuAcgpjxF3rY03GMUES3HoqISPIpEFgoYsFJhTETRnqJGiZ/6dAlRyK9ydreAAAZjUlEQVQiknwKBBaKryEY4pQBnFhYqHUEIiIyHBQILGTFXQYxsa2H2mkgIiLDQYHAQrFAMNRdBqCdBiIiMrwUCCwUO5jIacGUQUF+Ng67TacViojIsFAgsJBVdxkAuJzdlxxphEBERIaDM9UNOJfEbzs8w0DgdNj4+fa98d0K7gw7bx0L88TWt/BkOnE67HymZKxl7RUREYnRCIGFYlMGQxkgiEQNwlGTcNTkAk8mAO8d6SIcNeNBQURExGoKBBYK9fzCdg7xcqOYC33dgaC9M2jJ54mIiPRHgcBCkfguA2s+70JvTyDwKxCIiEhyKRBYKHYw0VDvMogZ2TNCcFAjBCIikmQKBBay8hwCAI/LgTvDoRECERFJOgUCC4WjJjasOboYwGazcUmem4P+EAeOdlnymSIiIn1RILBQJGpYcmzxyaZclg/Af7/dYennioiInEyBwELhqGl5IJh4cQ6+TCd//Mv7ugpZRESSRoHAQhHD+hECh93G5Mvy6AobvPbuYUs/W0REJCbhSYWGYbB8+XLa2tpwuVzU1NQwbty4eHlDQwMbN27E6XRSUVHBjBkz6OjooKqqiq6uLkaNGsXKlStxu92DqltTU8Mf//hHPJ7uW//Wrl1LOBzus266CEdNy9YPnOxvL8tnS1s7L+/WtIGIiCRHwhGCzZs3EwqFqK+vZ+nSpaxatSpe1t7eTl1dHRs3buTxxx/noYceIhQKsXbtWubMmcOGDRuYOHEi9fX1g6oLsHPnTh577DHq6uqoq6vD5/P1WzddhKMGTotHCADysl0UXuTjLx3H2LnviOWfLyIikjAQNDU1MX36dAAmTZpES0tLvGzHjh2UlJTgcrnw+XwUFBTQ2tra6z1lZWVs27ZtUHUNw2DPnj3cc889zJ8/n6effvoDbYnVTSeRqGnZlsNTffTy7sWFG175S1I+X0REzm8JA4Hf78fr9cZfOxwOIpFIvMzn88XLPB4Pfr+/13OPx0NnZ+eg6h47dowvfvGLPPjggzz22GNs2LCB1tbWPusmUltbS1FREUVFRdTW1g6kT85YMtYQxBSO9jHCncF/bt9LIBhJyneIiMj5K2Eg8Hq9BAKB+GvDMHA6nX2WBQIBfD5fr+eBQICcnJxB1XW73dx000243W68Xi8f+9jHaG1t7bNuIpWVlbS1tdHW1kZlZeVA+uSMhaOmZacUnspuszHlsjwCoSi/az2QlO8QEZHzV8JAUFpaSmNjIwDNzc0UFhbGy4qLi2lqaiIYDNLZ2cmuXbsoLCyktLSULVu2ANDY2MjkyZMHVfftt9+mvLycaDRKOBzmj3/8I1dddVWfddNJOGrgSOK+jb+5ZAQAv9qxL3lfIiIi5yWbaZrm6SrEdhm88cYbmKbJihUraGxspKCggJkzZ9LQ0EB9fT2mabJo0SJuuOEGDh48SHV1NYFAgLy8PNasWUN2dvag6v7oRz/i17/+NRkZGfzTP/0TCxYs6Lduuij89rOMzs1i8bUT+ix3u+xEeq42PpPyDIeNH/1hN28fOsYf756FNzPhJhEREZEBSRgIZOAu/9avuCw/m1vLkhcI9h3u4l83v8G/z5/EP00aa1nbRUTk/KaDiSwSNUxM07qLjfozu3g0AM/seC+p3yMiIucXBQKLxG46TNYug5grRvn40GgfW9ra6ewKJ/W7RETk/KFAYJHhCgQAsz9yMaGoweY/70/6d4mIyPlBgcAikZ55/2RtOzzZ3xdfDMCvNG0gIiIWUSCwSNgYvhGCCSO9fGi0j8Y3DnLkuKYNRERk6BQILBIbIUj2osKYOcXd0wa/07SBiIhYQIHAIvE1BMMwZQDwqau7dxv89nUFAhERGToFAovEzg4YjikD6J42uPxCD1veaKcrHB2W7xQRkXOXAoFFIsO4hgDAZrNx/cSLOBaKsm3XwWH5ThEROXfp7FuLDMcuA6fDxs+37yXSMz3h7AkfP9yym0P+EE6Hnc+U6PRCEREZPI0QWCTU80s62YsKI1GDcM/xxhePcOPNdLJz31GCESMeFERERAZLgcAisREC5zBNGUD3lcgfGu0jEIzwTsexYfteERE59ygQWCQyTCMEp5o4JgeA1987OqzfKyIi5xYFAouEjeE7qfBkE0Z6cTnsvL7vKLq4UkREzpQCgUUi8bsMhvd7Mxx2Ci/ycigQ4kBncHi/XEREzhkKBBY5cbnR8Hfphy/unjbY+uahYf9uERE5NygQWOTEwUTD/91XjcllpC+Tl3Yf4j+37x3+BoiIyFlPgcAi8YOJhnkNAYDLaeeLHx1HltPOnT/bQcveI8PeBhERObspEFgkPMyXG51qpC+TBdcU0BU2WFTXREcglJJ2iIjI2UmBwCIn1hCkJhBA9xbEb1x3JXsPH+feX+5MWTtEROTso0BgkcgwX27Un9v/7kquHOXl2T/9lcPHNEogIiIDo0BgkeG+/rg/druNGydfQihq8MyO91LaFhEROXsoEFgkYqTHCAHAZ0rGYrfBpj++m+qmiIjIWUKBwCLhSM/RxSkeIQC4KCeLqVdcyPa/HGZ3uz/VzRERkbOAAoFFYkcXD+flRqfz2cmXAPCzP+pcAhERScyZ6gacK1J1udHJnA4bP9++l0jUIBQxyHTa+ckre7g0343dZsPpsPOZkrEpa5+IiKQvjRBYJF3WEESiBuGoic1m4+qxuRw+FuaN/X7CUTMeWkRERE6VcITAMAyWL19OW1sbLpeLmpoaxo0bFy9vaGhg48aNOJ1OKioqmDFjBh0dHVRVVdHV1cWoUaNYuXIlbrd7UHWfeOIJfvWrXwFw7bXX8rWvfQ3TNCkrK+Oyyy4DYNKkSSxdujQ5PTNI6bLL4GSlBXk07Xmf7X85zISR3lQ3R0RE0ljCQLB582ZCoRD19fU0NzezatUqHnnkEQDa29upq6tj06ZNBINBysvLmTp1KmvXrmXOnDnMmzePdevWUV9fz+zZswdcd+bMmfziF7/gpz/9KTabjfLycq677jrcbjdXXXUVP/zhD5PeMYOVDgcTnWrcBdnke1y89u5hpl5xAQX52alukoiIpKmEUwZNTU1Mnz4d6P4TeUtLS7xsx44dlJSU4HK58Pl8FBQU0Nra2us9ZWVlbNu2bVB1R48ezWOPPYbD4cButxOJRMjMzGTnzp3s37+fhQsXcuutt7J79+5k9MkZSZeDiU5mt9mYU3wxUcPkp6++qykDERHpV8JA4Pf78XpPDDc7HA4ikUi8zOfzxcs8Hg9+v7/Xc4/HQ2dn56DqZmRkkJ+fj2marF69mokTJ3L55ZczcuRIvvKVr1BXV8eiRYtYtmxZwh+wtraWoqIiioqKqK2tHWC3DF44DQMBwIdG5/C3l+Xx16Nd/Pb1/alujoiIpKmEUwZer5dAIBB/bRgGTqezz7JAIIDP54s/z8rKIhAIkJOTM6i6AMFgkLvuuguPx8N3v/tdAK6++mocDgcAU6ZMYf/+/Zhm9wK6/lRWVlJZWTmYPjkjsdsO0ywPAPD3V1/Mmwf8vNDWTtOeDiaPy091k0REJM0kHCEoLS2lsbERgObmZgoLC+NlxcXFNDU1EQwG6ezsZNeuXRQWFlJaWsqWLVsAaGxsZPLkyYOqa5omX/3qVykqKuK+++6Lh4CHH36YJ598EoDW1lbGjBlz2jAwnNJxDUFMZoaDz06+FIA7Gl4jEIykuEUiIpJubKZpmqerENtl8MYbb2CaJitWrKCxsZGCggJmzpxJQ0MD9fX1mKbJokWLuOGGGzh48CDV1dUEAgHy8vJYs2YN2dnZA667detW7rjjDiZNmhRvxx133MH48eNZtmwZx44dw+FwcM899zBhwoSkd9JA3Pr/vcpvX9/Pvf84kYyeAHMqt8tOJGrGpxeGu/y5nX9lyxvtfPFjBdTM/cgAfioRETlfJAwEMjD/Z/1/80JbOzVzr8Ju63vgJdWBwIbJE9v20La/kydvuYZrC0cO4CcTEZHzgQ4mskj8YKI0mcLoi9Nh56Ev/A0ZDhvffPo1XY8sIiJxCgQWCafB0cUDcdWYXL5xXSH7jwa5+792pro5IiKSJhQILBKOmtht6XHbYSKLysZTWjCCX762j1+8ti/VzRERkTSgQGCRSNQgw3F2dKfTYeehz0/CneHg7v9sYf/RrlQ3SUREUuzs+A12FghHzbMmEABcdqGHb8/+MEeOh/nm0zvQ2lIRkfObrj+2SMQwcDrSe7rg5OuRAVwOG4UXednyRjtLf/oa068cqeuRRUTOUwoEFglHTZz29B8hiF2PHPOZkkv4/u/+l1++to93O47xzGv76DgWYl7JWBZ+/LLUNVRERIaVAoFFwlEDV5qPEPQl153BP00aw8b/eYf/fvv9+PPX3jnMhFFePjHhwhS2TkREhosCgUUiUROXM/1HCPpSfMkIRudkkeG08cWPXcabBzr5wqMvc/tTzfy/r09jlC8r1U0UEZEkOzt/g6Whs2ENwemMyslilC+LXHcGk8flc+enP8RBf5Dbn9pO1NCCQxGRc51GCCwSihhknAVrCE7n5EWHOVlOrhqTw8u7O7jxkW1cNSaHS/Oz+b/TLsd5Fu2mEBGRgVEgsEjEMM/qEYKYkxcdziu5hL8eeZPmdw7T/M5hAB7dsouvlE3g5k+MI9ulf31ERM4V+j+6RSJn2TkEA+F2Oaj8uyt55/1j/PVIF/uPdvHG/k5W/7qVx1/czVc/eQU3f+KytLzyWUREBkeBwCJhwyDjHBghOJXLaWfCSC8TRnrJcNi4/qrRPP6H3fx469vc98zr/GnvER78bLGmEUREznL6v7gFooaJaXJWnEMwVLnuDO64vojGb86gtGAEP9++lyUNr8UPOxIRkbOTRggsELvp8FxYQ3A6p550+JmSsbwfCPHL1/ax52CABR8tYME1BSlupYiInAkFAguE40cBn/sjBCcvOnTY7dz0ict4ctseduw9QsvP/8TG/3mHj4+/gHmlYym8yJfi1oqIyEApEFgg0vML8lwfIehLptPBlz5xGX/433bebPezc+8RXnvnMD/6w26++NEClswqZES2K9XNFBGRBBQILBA2YlMG5/4IQV9cTjszP3wRn7p6NH//kYtpfKOdB59r48mX9vBfr+1j6axCFlxTcN72j4jI2UD/h7ZAbAg94zzffud02PjN6/vxByPcWnY5c4ovpisU5e7/2sm01S/wwK9bU91EERHph0YILBCJnt8jBCc7scbAxicmXMhHxuby29f307Tnfdb+fhd/fu8os4vH8MmikVzozUx1c0VEpIcCgQXiIwQKBB/gy8pgXuklfPTyC/h/f9rHC23tvNDWjs0Gf3PJCD4x4QI+Ov4CpozLw5Opfx1FRFJF/we2QKRnDcG5eDCRVcbmuamceQUHOkP86d0j/Pm9o+x4t/tI5LW/34XdBh+fcCE/KC/RIkQRkRRQILDAM6+9B8BFObom+HRsNhsXeFxMveJCpl5xIcFwlD0dx3jrYICWvUfY+uZBZnzv91TdUMRnSsbqrgQRkWFkM01Td9sOwZ/ePcLctVsZnZPFc0vKePZP78WnEE7ldtmJRE2V91EeMQxe2d3B79sOEAhFAfBmOhnpy2TMiCzGXeDh8gs8jM7Nwp3hwO1y4Ml0MmZEFiO9mdhsGp0RERkK/RFsCEIRg6qfvkbUMHngs8V4NQd+xpx2O9dNHMXfXp7P717fz/7OLjq7IrR3BnnrYICtbx7q970up50xuVlkOh3YbN27HS7Ny+bqsblMHJNDQX42OVkZ5LidZDodw/hTiYicPRL+BjMMg+XLl9PW1obL5aKmpoZx48bFyxsaGti4cSNOp5OKigpmzJhBR0cHVVVVdHV1MWrUKFauXInb7U5a3VR5+Pn/pW1/J+UfLWDqFRemrB3nEo/Lwac/cnGvZ6GIQSAU5qA/xPuBEOGoSThq0BWOcvhYmI5jIQ4FQkQNE8MwiRgmLXuP8mzLXz/w+b5MJ5fmZzPugmzGjHCTlWHH5XDgdtkZO6L7ecEF2QAcPR7m6PEI2S4HY0a4cTm1aFREzl0Jpwx+85vf8Pzzz7Nq1Sqam5t59NFHeeSRRwBob2/nlltuYdOmTQSDQcrLy9m0aRMPPPAAEydOZN68eaxbtw6Xy8Xs2bOTUvdLX/rScPTTB7x5wM8N/9bI6Jwsfv2N6fiyMgD46avvpOWQ/NlePpj3mqbJkeNh9h3uYt+R43R2hQlFDY6HDI4eD3MoEOz3c/pjt8HFuW4uzXdTkJ9NQX42F3ozOR6O4u+KcDwcxeW0k+1y4HY5cWc4ev7e0T3F0TPNAXAsFOVYMELUNBnly2J0ThY5bueQpj1M09S0ichZKBQxiBhGWqyZStiCpqYmpk+fDsCkSZNoaWmJl+3YsYOSkhJcLhcul4uCggJaW1tpampi0aJFAJSVlfHQQw9x6aWXJqVuqgJBVzjK2BFuVt9YHA8DEDuLoO+b/7pvQzRVfgblg3uvjZG+TEb6MvmbS3MByMpwEDFMIlED0zTpDEY4cix84hnQ4Q+xvzPIoUAQu80W/0XeFYnSEQgRCEZ5eXcHL+/u6LMNQ+Fy2sly2nE5HWSeMhLRV2a32WxEDZNjoe4wEo6a5HtcjPRmcoHXRaSn7Fgwigk47TacDjsuR/dfnXYbGQ47TocNp92Gw27DNMEwu7/PBAzT7HnW/f3GSe2wYetpR+82dZf1ft6r3XTfgeGwd/8zs9u7v99us/X5nlMf9V3ngw/7rNdnm2wJ65ztMSvVObGvfz7D+v0p/HrDNOnsinD4WJijXWGynA5y3RnkujN4/1iIN9v97Dl0jKhhMiY3iysu8jFlXB63zbgCRwoOuksYCPx+P16vN/7a4XAQiURwOp34/X58vhMX2Hg8Hvx+f6/nHo+Hzs7OpNVNpLa2locffhiAr33ta1RWViZ8z0BcPTaXxm/O+MDzd1/8mWXfcb6pra1V3w2B+u/Mqe/OnPpuaNKp/xJOinq9XgKBQPy1YRg4nc4+ywKBAD6fr9fzQCBATk5O0uomUllZSVtbG21tbcPS6bHwIYOnvhsa9d+ZU9+dOfXd0KRT/yUMBKWlpTQ2NgLQ3NxMYWFhvKy4uJimpiaCwSCdnZ3s2rWLwsJCSktL2bJlCwCNjY1Mnjw5aXVFRERk6BJOGcyaNYutW7cyf/58TNNkxYoVrF+/noKCAmbOnMnChQspLy/HNE2WLFlCZmYmFRUVVFdX09DQQF5eHmvWrCE7OzspdUVERGTodDCRxdJpPuhso74bGvXfmVPfnTn13dCkU/8pEIiIiEjiNQQiIiJy7lMgEBEREQUCERERUSAQERERFAhEREQEXX9smUS3Qp5vwuEwd911F3v37iUUClFRUcEVV1zBnXfeic1m48orr+S73/0udrudhx9+mN///vc4nU7uuusuiouL2bNnz4DrnqsOHTrEvHnz+PGPf4zT6VTfDcKjjz7K888/TzgcZsGCBVxzzTXqvwEIh8Pceeed7N27F7vdzv33369/9wbgtdde43vf+x51dXWD6gMr6lrKFEs899xzZnV1tWmaprl9+3Zz8eLFKW5Raj399NNmTU2NaZqm2dHRYV577bXmokWLzJdfftk0TdO8++67zd/85jdmS0uLuXDhQtMwDHPv3r3mvHnzTNM0B1X3XBQKhcyvfvWr5vXXX2+++eab6rtBePnll81FixaZ0WjU9Pv95ve//3313wD99re/NW+//XbTNE3zxRdfNL/2ta+p7xJYt26dOWfOHPNzn/ucaZqD64Oh1rWapgwscrpbIc9Hn/rUp/j6178ef+1wONi5cyfXXHMN0H1b5bZt22hqamLatGnYbDbGjBlDNBqlo6NjUHXPRatXr2b+/PmMGjUKQH03CC+++CKFhYXcdtttLF68mE9+8pPqvwG6/PLLiUajGIaB3+/H6XSq7xIoKCigtrY2/jpZ/dVXXaspEFikv1shz1cejwev14vf7+f222/nG9/4BqZpxq/IPflmy5P7LfZ8MHXPNT/72c/Iz8+PB0xAfTcI77//Pi0tLfz7v/879957L1VVVeq/AcrOzmbv3r18+tOf5u6772bhwoXquwRuuOGG+IV/kLz/VvuqazWtIbDI6W6FPF+999573HbbbZSXl/MP//APPPjgg/GyRDdbnjw3lqjuuWbTpk3YbDZeeukl/vznP1NdXd3rT1Tqu9MbMWIE48ePx+VyMX78eDIzM/nrX/8aL1f/9e+JJ55g2rRpLF26lPfee4+bb76ZcDgcL1ffJTaYPhhqXcvbbvknnqdOdyvk+ejgwYPccsstLFu2jM9+9rMATJw4kVdeeQXovq1yypQplJaW8uKLL2IYBvv27cMwDPLz8wdV91zzH//xH/zkJz+hrq6OD3/4w6xevZqysjL13QBNnjyZP/zhD5imyf79+zl+/Dgf//jH1X8DkJOTE/9lnZubSyQS0X+3g5Ss/uqrrtV0l4FFYrsM3njjjfitkBMmTEh1s1KmpqaGZ599lvHjx8efffvb36ampoZwOMz48eOpqanB4XBQW1tLY2MjhmHwrW99iylTpvDWW29x9913D6juuWzhwoUsX74cu90+4P5Q38EDDzzAK6+8Er8t9ZJLLlH/DUAgEOCuu+6ivb2dcDjMTTfdxNVXX62+S+Ddd9/ljjvuoKGhYVB9YEVdKykQiIiIiKYMRERERIFAREREUCAQERERFAhEREQEBQIRERFBgUBERERQIBAREREUCERERAT4/wHIRg2Kfzt0UgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 576x396 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.distplot(bcback(bc_y_train,maxlog))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.1.5 Cat_feature encoding"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "类别特征进行标签化,化为从0开始的int数值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SaleID</th>\n",
       "      <th>name</th>\n",
       "      <th>regDate</th>\n",
       "      <th>model</th>\n",
       "      <th>brand</th>\n",
       "      <th>bodyType</th>\n",
       "      <th>fuelType</th>\n",
       "      <th>gearbox</th>\n",
       "      <th>power</th>\n",
       "      <th>kilometer</th>\n",
       "      <th>...</th>\n",
       "      <th>v_5</th>\n",
       "      <th>v_6</th>\n",
       "      <th>v_7</th>\n",
       "      <th>v_8</th>\n",
       "      <th>v_9</th>\n",
       "      <th>v_10</th>\n",
       "      <th>v_11</th>\n",
       "      <th>v_12</th>\n",
       "      <th>v_13</th>\n",
       "      <th>v_14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>736</td>\n",
       "      <td>20040402</td>\n",
       "      <td>30.0</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>60</td>\n",
       "      <td>12.5</td>\n",
       "      <td>...</td>\n",
       "      <td>0.235718</td>\n",
       "      <td>0.101990</td>\n",
       "      <td>0.129517</td>\n",
       "      <td>0.022812</td>\n",
       "      <td>0.097473</td>\n",
       "      <td>-2.880859</td>\n",
       "      <td>2.804688</td>\n",
       "      <td>-2.419922</td>\n",
       "      <td>0.795410</td>\n",
       "      <td>0.914551</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2262</td>\n",
       "      <td>20030301</td>\n",
       "      <td>40.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.264893</td>\n",
       "      <td>0.121033</td>\n",
       "      <td>0.135742</td>\n",
       "      <td>0.026596</td>\n",
       "      <td>0.020584</td>\n",
       "      <td>-4.902344</td>\n",
       "      <td>2.095703</td>\n",
       "      <td>-1.030273</td>\n",
       "      <td>-1.722656</td>\n",
       "      <td>0.245483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>14874</td>\n",
       "      <td>20040403</td>\n",
       "      <td>115.0</td>\n",
       "      <td>15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>163</td>\n",
       "      <td>12.5</td>\n",
       "      <td>...</td>\n",
       "      <td>0.251465</td>\n",
       "      <td>0.114929</td>\n",
       "      <td>0.165161</td>\n",
       "      <td>0.062164</td>\n",
       "      <td>0.027069</td>\n",
       "      <td>-4.847656</td>\n",
       "      <td>1.803711</td>\n",
       "      <td>1.565430</td>\n",
       "      <td>-0.832520</td>\n",
       "      <td>-0.229980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>71865</td>\n",
       "      <td>19960908</td>\n",
       "      <td>109.0</td>\n",
       "      <td>10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>193</td>\n",
       "      <td>15.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.274414</td>\n",
       "      <td>0.110291</td>\n",
       "      <td>0.121948</td>\n",
       "      <td>0.033386</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-4.507812</td>\n",
       "      <td>1.286133</td>\n",
       "      <td>-0.501953</td>\n",
       "      <td>-2.437500</td>\n",
       "      <td>-0.478760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>137642</td>\n",
       "      <td>20090602</td>\n",
       "      <td>24.0</td>\n",
       "      <td>10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>109</td>\n",
       "      <td>10.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.260254</td>\n",
       "      <td>0.000518</td>\n",
       "      <td>0.119812</td>\n",
       "      <td>0.090942</td>\n",
       "      <td>0.048767</td>\n",
       "      <td>1.885742</td>\n",
       "      <td>-2.722656</td>\n",
       "      <td>2.457031</td>\n",
       "      <td>-0.286865</td>\n",
       "      <td>0.206543</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49995</th>\n",
       "      <td>249995</td>\n",
       "      <td>111443</td>\n",
       "      <td>20041005</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>150</td>\n",
       "      <td>15.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.263672</td>\n",
       "      <td>0.000292</td>\n",
       "      <td>0.141846</td>\n",
       "      <td>0.076416</td>\n",
       "      <td>0.039276</td>\n",
       "      <td>2.072266</td>\n",
       "      <td>-2.531250</td>\n",
       "      <td>1.716797</td>\n",
       "      <td>-1.063477</td>\n",
       "      <td>0.326660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49996</th>\n",
       "      <td>249996</td>\n",
       "      <td>152834</td>\n",
       "      <td>20130409</td>\n",
       "      <td>65.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>179</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.255371</td>\n",
       "      <td>0.000991</td>\n",
       "      <td>0.155884</td>\n",
       "      <td>0.108398</td>\n",
       "      <td>0.067871</td>\n",
       "      <td>1.358398</td>\n",
       "      <td>-3.291016</td>\n",
       "      <td>4.269531</td>\n",
       "      <td>0.140503</td>\n",
       "      <td>0.556152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49997</th>\n",
       "      <td>249997</td>\n",
       "      <td>132531</td>\n",
       "      <td>20041211</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>147</td>\n",
       "      <td>12.5</td>\n",
       "      <td>...</td>\n",
       "      <td>0.262939</td>\n",
       "      <td>0.000318</td>\n",
       "      <td>0.141846</td>\n",
       "      <td>0.071960</td>\n",
       "      <td>0.042969</td>\n",
       "      <td>2.166016</td>\n",
       "      <td>-2.417969</td>\n",
       "      <td>1.371094</td>\n",
       "      <td>-1.073242</td>\n",
       "      <td>0.270508</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49998</th>\n",
       "      <td>249998</td>\n",
       "      <td>143405</td>\n",
       "      <td>20020702</td>\n",
       "      <td>40.0</td>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>176</td>\n",
       "      <td>15.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.282227</td>\n",
       "      <td>0.000023</td>\n",
       "      <td>0.067505</td>\n",
       "      <td>0.067505</td>\n",
       "      <td>0.009003</td>\n",
       "      <td>2.029297</td>\n",
       "      <td>-2.939453</td>\n",
       "      <td>0.568848</td>\n",
       "      <td>-1.717773</td>\n",
       "      <td>0.316406</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49999</th>\n",
       "      <td>249999</td>\n",
       "      <td>78202</td>\n",
       "      <td>20090708</td>\n",
       "      <td>32.0</td>\n",
       "      <td>8</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.231445</td>\n",
       "      <td>0.103943</td>\n",
       "      <td>0.096008</td>\n",
       "      <td>0.062317</td>\n",
       "      <td>0.110168</td>\n",
       "      <td>-3.689453</td>\n",
       "      <td>2.033203</td>\n",
       "      <td>0.109131</td>\n",
       "      <td>2.203125</td>\n",
       "      <td>0.847656</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>181137 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       SaleID    name   regDate  model  brand  bodyType  fuelType  gearbox  \\\n",
       "0           0     736  20040402   30.0      6       1.0       0.0      0.0   \n",
       "1           1    2262  20030301   40.0      1       2.0       0.0      0.0   \n",
       "2           2   14874  20040403  115.0     15       1.0       0.0      0.0   \n",
       "3           3   71865  19960908  109.0     10       0.0       0.0      1.0   \n",
       "5           5  137642  20090602   24.0     10       0.0       1.0      0.0   \n",
       "...       ...     ...       ...    ...    ...       ...       ...      ...   \n",
       "49995  249995  111443  20041005    4.0      4       0.0       0.0      1.0   \n",
       "49996  249996  152834  20130409   65.0      1       0.0       0.0      0.0   \n",
       "49997  249997  132531  20041211    4.0      4       0.0       0.0      1.0   \n",
       "49998  249998  143405  20020702   40.0      1       4.0       0.0      1.0   \n",
       "49999  249999   78202  20090708   32.0      8       1.0       0.0      0.0   \n",
       "\n",
       "       power  kilometer  ...       v_5       v_6       v_7       v_8  \\\n",
       "0         60       12.5  ...  0.235718  0.101990  0.129517  0.022812   \n",
       "1          0       15.0  ...  0.264893  0.121033  0.135742  0.026596   \n",
       "2        163       12.5  ...  0.251465  0.114929  0.165161  0.062164   \n",
       "3        193       15.0  ...  0.274414  0.110291  0.121948  0.033386   \n",
       "5        109       10.0  ...  0.260254  0.000518  0.119812  0.090942   \n",
       "...      ...        ...  ...       ...       ...       ...       ...   \n",
       "49995    150       15.0  ...  0.263672  0.000292  0.141846  0.076416   \n",
       "49996    179        4.0  ...  0.255371  0.000991  0.155884  0.108398   \n",
       "49997    147       12.5  ...  0.262939  0.000318  0.141846  0.071960   \n",
       "49998    176       15.0  ...  0.282227  0.000023  0.067505  0.067505   \n",
       "49999      0        3.0  ...  0.231445  0.103943  0.096008  0.062317   \n",
       "\n",
       "            v_9      v_10      v_11      v_12      v_13      v_14  \n",
       "0      0.097473 -2.880859  2.804688 -2.419922  0.795410  0.914551  \n",
       "1      0.020584 -4.902344  2.095703 -1.030273 -1.722656  0.245483  \n",
       "2      0.027069 -4.847656  1.803711  1.565430 -0.832520 -0.229980  \n",
       "3      0.000000 -4.507812  1.286133 -0.501953 -2.437500 -0.478760  \n",
       "5      0.048767  1.885742 -2.722656  2.457031 -0.286865  0.206543  \n",
       "...         ...       ...       ...       ...       ...       ...  \n",
       "49995  0.039276  2.072266 -2.531250  1.716797 -1.063477  0.326660  \n",
       "49996  0.067871  1.358398 -3.291016  4.269531  0.140503  0.556152  \n",
       "49997  0.042969  2.166016 -2.417969  1.371094 -1.073242  0.270508  \n",
       "49998  0.009003  2.029297 -2.939453  0.568848 -1.717773  0.316406  \n",
       "49999  0.110168 -3.689453  2.033203  0.109131  2.203125  0.847656  \n",
       "\n",
       "[181137 rows x 29 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 181137 entries, 0 to 49999\n",
      "Data columns (total 29 columns):\n",
      " #   Column             Non-Null Count   Dtype   \n",
      "---  ------             --------------   -----   \n",
      " 0   SaleID             181137 non-null  int32   \n",
      " 1   name               181137 non-null  int32   \n",
      " 2   regDate            181137 non-null  int32   \n",
      " 3   model              181137 non-null  float16 \n",
      " 4   brand              181137 non-null  int8    \n",
      " 5   bodyType           181137 non-null  float16 \n",
      " 6   fuelType           181137 non-null  float16 \n",
      " 7   gearbox            181137 non-null  float16 \n",
      " 8   power              181137 non-null  int16   \n",
      " 9   kilometer          181137 non-null  float16 \n",
      " 10  notRepairedDamage  181137 non-null  category\n",
      " 11  regionCode         181137 non-null  int16   \n",
      " 12  creatDate          181137 non-null  int32   \n",
      " 13  price              131137 non-null  float64 \n",
      " 14  v_0                181137 non-null  float16 \n",
      " 15  v_1                181137 non-null  float16 \n",
      " 16  v_2                181137 non-null  float16 \n",
      " 17  v_3                181137 non-null  float16 \n",
      " 18  v_4                181137 non-null  float16 \n",
      " 19  v_5                181137 non-null  float16 \n",
      " 20  v_6                181137 non-null  float16 \n",
      " 21  v_7                181137 non-null  float16 \n",
      " 22  v_8                181137 non-null  float16 \n",
      " 23  v_9                181137 non-null  float16 \n",
      " 24  v_10               181137 non-null  float16 \n",
      " 25  v_11               181137 non-null  float16 \n",
      " 26  v_12               181137 non-null  float16 \n",
      " 27  v_13               181137 non-null  float16 \n",
      " 28  v_14               181137 non-null  float16 \n",
      "dtypes: category(1), float16(20), float64(1), int16(2), int32(4), int8(1)\n",
      "memory usage: 13.5 MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>nullrate</th>\n",
       "      <th>nullrate%</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [0, nullrate, nullrate%]\n",
       "Index: []"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my.Nullrate(data[cat_feat])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in cat_feat:\n",
    "    data[i]=data[i].apply(lambda x: int(float(x)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.notRepairedDamage=data.notRepairedDamage.astype(\"int\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 181137 entries, 0 to 49999\n",
      "Data columns (total 29 columns):\n",
      " #   Column             Non-Null Count   Dtype  \n",
      "---  ------             --------------   -----  \n",
      " 0   SaleID             181137 non-null  int32  \n",
      " 1   name               181137 non-null  int64  \n",
      " 2   regDate            181137 non-null  int32  \n",
      " 3   model              181137 non-null  int64  \n",
      " 4   brand              181137 non-null  int64  \n",
      " 5   bodyType           181137 non-null  int64  \n",
      " 6   fuelType           181137 non-null  int64  \n",
      " 7   gearbox            181137 non-null  int64  \n",
      " 8   power              181137 non-null  int16  \n",
      " 9   kilometer          181137 non-null  float16\n",
      " 10  notRepairedDamage  181137 non-null  int32  \n",
      " 11  regionCode         181137 non-null  int64  \n",
      " 12  creatDate          181137 non-null  int32  \n",
      " 13  price              131137 non-null  float64\n",
      " 14  v_0                181137 non-null  float16\n",
      " 15  v_1                181137 non-null  float16\n",
      " 16  v_2                181137 non-null  float16\n",
      " 17  v_3                181137 non-null  float16\n",
      " 18  v_4                181137 non-null  float16\n",
      " 19  v_5                181137 non-null  float16\n",
      " 20  v_6                181137 non-null  float16\n",
      " 21  v_7                181137 non-null  float16\n",
      " 22  v_8                181137 non-null  float16\n",
      " 23  v_9                181137 non-null  float16\n",
      " 24  v_10               181137 non-null  float16\n",
      " 25  v_11               181137 non-null  float16\n",
      " 26  v_12               181137 non-null  float16\n",
      " 27  v_13               181137 non-null  float16\n",
      " 28  v_14               181137 non-null  float16\n",
      "dtypes: float16(16), float64(1), int16(1), int32(4), int64(7)\n",
      "memory usage: 21.1 MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.2 Feature Construction"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 特征构造\n",
    "    * 单一特征\n",
    "        * 数值特征（归一化/标准化/分箱离散化/box-cox变换）\n",
    "        * 类别特征（py库：category_encoders）\n",
    "            * 时间序列-年月日/是否假期/周末/rank/count\n",
    "            * 地理信息-背后有没有什么实际含义进行提取/len/rank\n",
    "            * 高基数特征（id）-count/rank/CatBoost Encoder/LeaveOneOut、 WeightOfEvidence、 James-Stein、M-estimator\n",
    "            * 稀疏特征-OneHot, Hashing, LeaveOneOut,target encoding\n",
    "    * 特征组合\n",
    "        * 数值+数值 （多项式组合/gplearn库）\n",
    "        * 类别+数值 （statistic特征）\n",
    "* 标签分析\n",
    "    * 标签分布：是否均匀；是否异常（正态分布？）\n",
    "    * 标签box-cox转化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "#由于数据没有标准化，因此先采用树模型如LGB进行特征选择。\n",
    "def lgb_feat_selection(df,label,k=5):\n",
    "    train_x=df[df.SaleID<200000].drop(label,axis=1)\n",
    "    train_y=df[df.SaleID<200000][label]\n",
    "    clf = lgb.LGBMRegressor(objective='regression',metric= 'mae',silent=1,num_leaves=80,learning_rate=0.03, n_estimators=300)\n",
    "    clf.fit(train_x, train_y,categorical_feature=cat_feat,verbose=0) \n",
    "    keys=df.columns\n",
    "    values=clf.feature_importances_\n",
    "    feim=dict(zip(keys,values))\n",
    "    rank=sorted(feim.items(),  key=lambda d: d[1], reverse=True) #按得分降序\n",
    "    derank=sorted(feim.items(),  key=lambda d: d[1], reverse=False) #按得分升序\n",
    "    rank_keys=[i[0] for i in rank][0:k]\n",
    "    derank_keys=[i[0] for i in derank][0:k]\n",
    "    lgb.plot_importance(clf)\n",
    "    return {\"Kbest\":rank_keys,\"kworst\":derank_keys}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Kbest': ['v_9',\n",
       "  'regDate',\n",
       "  'regionCode',\n",
       "  'model',\n",
       "  'power',\n",
       "  'v_5',\n",
       "  'name',\n",
       "  'v_2',\n",
       "  'v_0',\n",
       "  'price'],\n",
       " 'kworst': ['gearbox',\n",
       "  'fuelType',\n",
       "  'bodyType',\n",
       "  'brand',\n",
       "  'v_3',\n",
       "  'SaleID',\n",
       "  'v_6',\n",
       "  'creatDate',\n",
       "  'v_12',\n",
       "  'notRepairedDamage']}"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkUAAAFoCAYAAABZpI+0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3Xtcz/f///Hbu4SUcyJkyXl82FDZx2HOp83YKJVlNmNs5TQrI4Xk1BwmH21jxnIIk8O2z2ZjxpwnJqHPnOWUCBUd3z1/f7x+vb9MeEf1Tj2ul8vnctGr1+v9frzu88mj1/v1ejx1SimFEEIIIUQJZ2bqAoQQQgghigJpioQQQgghkKZICCGEEAKQpkgIIYQQApCmSAghhBACkKZICCGEEAKQpkgIUYQ1atSIPn360LdvX8P/Jk2a9NSvFx0dTUBAQD5W+KDt27czffr0Anv9R4mLi8PHx6fQ31eI4qaUqQsQQojHWbFiBVWqVMmX1zp9+jTx8fH58lq56dKlC126dCmw13+UK1eucO7cuUJ/XyGKG50MbxRCFFWNGjVi3759uTZFZ86cITg4mNu3b6PX6/Hy8mLAgAFkZ2czY8YMjh49yt27d1FKMX36dGrWrImHhwfJycl0796dfv36ERQUxA8//ADAgQMHDF+Hhoby119/cf36dRo1asRnn31GWFgYv/zyC9nZ2dSqVYvAwECqV6/+QE2RkZFs3bqVL7/8Ei8vL5o2bcpff/1FYmIibm5u3Lhxg4MHD5KamsqCBQto1KgRXl5evPjii0RFRXHr1i369u3LqFGjANi2bRuLFi0iOzsbKysrPv30U5o3b/5AfQ0aNODYsWPEx8fj5OTE119/zRdffMH27dtJS0sjNTUVPz8/unXrRmhoKJcvXyYhIYHLly9TvXp1QkJCsLW15dy5cwQEBJCYmIiZmRkjR46kd+/exMfHM23aNK5evUpmZiavvfYaI0aMKPj/+EKYghJCiCKqYcOG6vXXX1dvvPGG4X83btxQmZmZqnfv3iomJkYppVRSUpLq1auXOnLkiDp8+LDy8fFRer1eKaXUl19+qT744AOllFIbNmxQw4cPV0optX//fvXaa68Z3uv+rxcuXKh69OihMjMzlVJKbdy4UY0ZM8bwdUREhHr//fcfqvf+13/77beVt7e3Ukqpv/76SzVs2FBt375dKaVUcHCw8vf3N+w3bNgwlZGRoe7cuaN69OihfvvtN3X69Gn173//W128eFEppdTevXtV27ZtVXJy8kP13V/7pUuXlJeXl0pNTVVKKfXDDz+o119/3XBeXbp0UcnJyUoppT744AP1+eefK6WU6tevn1q5cqVSSqkrV64Y9vPy8jLUnZaWpry8vNSPP/6Y1/+UQjwX5OMzIUSRltvHZ6dPn+bixYtMnDjRsC0tLY0TJ07g6elJxYoViYiIIC4ujgMHDmBlZZXn933ppZcoVUr7Ebljxw6OHTtG//79AcjOziY1NfWJr9GtWzcA7O3tAWjfvj0AderU4eDBg4b9Bg4ciIWFBRYWFvTs2ZPdu3fj6OhImzZtDMe+8sorVKlShZiYmIfqu1+tWrWYM2cO33//PRcuXDBcMcvh7OyMtbU1AC+++CJ37tzh9u3bxMbG4urqCoCdnR3btm3j3r17/Pnnn9y5c4fPP/8cgHv37hEbG0vv3r2NjVKI54Y0RUKI545er6d8+fJs3rzZsO3GjRuUL1+e33//neDgYN599126dOmCo6MjW7Zseeg1dDod6r67BzIzMx/4frly5Qx/zs7O5v3338fT0xOAjIwM7ty588Q6S5cu/cDXFhYWue53f3OjlMLMzIzs7Gx0Ot0D+ymlyMrKeqi++x0/fpwPP/yQIUOG0LZtW5ycnJg6darh+2XLljX8OSeDnPe///3Onj1LtWrVUEoRERGBpaUlAImJiZQpU+aJ5y7E80iePhNCPHfq1q1L2bJlDU3R1atXef3114mJiWHPnj106tQJT09PmjVrxrZt29Dr9QCYm5sbmooqVapw5coVbt68iVKKH3/88ZHv165dO7777jtSUlIA+Pzzz/H19c2389myZQvZ2dncuXOHn376ic6dO/PKK6+we/du4uLiANi3bx9Xr16lRYsWDx1vbm5uaOr+/PNPmjVrxrvvvouzszPbt283nP+jWFtb07RpUzZt2gRoeXp4eJCWlsZLL73EN998A0BSUhIeHh5s3749385diKJErhQJIZ47pUuXZvHixQQHB7N06VKysrIYPXo0rVq1olKlSnz88cf06dOHrKws2rZta7hB+qWXXuI///kP3t7eLFq0CHd3d/r370+1atXo2LEjx44dy/X9XF1diY+Px83NDZ1Oh52dHbNmzcq380lLS2PAgAHcvXsXT09PXnnlFQACAwPx9vZGr9dTtmxZvvjiC8qXL//Q8fXr16dMmTIMGDCAL774gl9++YVevXqRnZ1Np06duHPnjqGhe5S5c+cydepUwsPD0el0BAcHU61aNT777DOCgoLo06cPGRkZvP7667zxxhv5du5CFCXy9JkQQpiQl5cXgwYNomfPnqYuRYgSTz4+E0IIIYRArhQJIYQQQgBypUgIIYQQApCmSAghhBACkKaoxDl+/LipSygSJAeN5KCRHDSSg0Zy0JTEHKQpKmHS0tJMXUKRIDloJAeN5KCRHDSSg6Yk5iBNkRBCCCEE0hQJIYQQQgDSFAkhhBBCANIUCSGEEEIA0hQJIYQQQgCyIKwQQgghnpFer8ff359z585hbm7OzJkzUUoxYcIEdDodDRo0IDAwkN27d7NkyRIAlFJERUXxww8/kJaWRmBgIKVLl6ZJkyZMmjQJM7PCv24jTZEQQgghnsmOHTsAiIiI4MCBA4amaMyYMbi4uBAQEMD27dvp1q0bHTp0AGDp0qW0bNmSevXq8dZbb+Hv70/Lli2ZP38+33//PX379i3085CPz4q4GTNmsGbNGsPX69at46233sLNzc3wlzAvypQpk5/lPbckB43koJEcNJKDRnLQ5CWHrl27EhQUBMCVK1ewsbHh+PHjODs7A9ChQwf27t1r2P/atWts3rwZb29vAOLj42nZsiUALVu2JCoqKr9OI0/kSlERlZiYiK+vL+fPn2fo0KEAJCQkEB4ezoYNG0hPT8fT05O2bdtSunRpo193zoG7xP38ewFV/ZyRHDSSg0Zy0EgOGslB84Qc7Ctb8u1QFwBKlSqFn58fv/76KwsXLmTHjh3odDoArKysSE5ONhz3zTffMGTIEMO/X/b29hw8eBBnZ2d27NhBampqwZzPE0hTVMi8vb0ZPHgwzs7OREdHExYWRlhY2EP73b17Fx8fH3bt2mXYFh0dzcsvv0zp0qUpXbo0derUITY2lubNmz/2PUNDQ1m0aBEALr7f5u8JCSGEKNFiYmJIT08HwM3Nje7du+Pn58e9e/cMV3yio6NJS0sjKiqK7Oxsfv75Zzp27Gj4/qBBgwgJCcHMzAxHR0cyMzPz9WpRq1atjNpPmqJC5urqysaNG3F2dmbjxo24ubnlup+9vT329vYPNEUpKSmUL1/e8LWVlRUpKSlPfE8fHx98fHwA7S9vs2Yuz3gWzz8th2amLsPkJAeN5KCRHDSSgyYvOWzatIn4+Hg++OADUlJSKFOmDA0bNiQrKwsXFxc2b95M7969adWqFbGxsTRp0oRXXnnFcHx0dDSLFi2ievXqBAUF0bNnT6Mbmfwk9xQVsvbt23Ps2DFu377NoUOHDDecGcPa2pq7d+8avr579+4DTZIxcrr5kk5y0EgOGslBIzloJAdNXnLo3r07J06cYNCgQQwdOpSJEycSEBBAaGgoAwcOJDMzkx49egBw7tw57O3tHzj+hRdeYPjw4bi7u2Ntbc2rr76ar+diLLlSVMjMzMzo2bMnU6ZMoWvXrpibmxt9bPPmzVmwYAHp6elkZGRw5swZGjZsWIDVCiGEEE9Wrlw5Pv/884e2r1y58qFtvXr1olevXg9s69y5M507dy6w+owlTZEJ9O/fn65du7J169Y8HVetWjW8vLzw9PREKcXYsWPlKQkhhBAin0hTZAJ2dnYcP37cqH1z7gXK4ebm9sj7kIQQQgjx9KQpMqErV67g5+f30HYnJydGjRplgoqEEELkp8zMTCZOnMjly5fJyMhg5MiRvPTSS/j7+5OUlIRer2fOnDnUqVMHgOzsbIYPH06XLl3w8PDg3r17fPzxx9y5cwdLS0tCQkKoUqWKic+q+JKmyAS8vLxITU3F0tKSzMxMateuzaRJk6hcufIjj1m7di1vvfUWFhYWhVipEEKIZ7FlyxYqVapESEgIt27d4s0336RNmzb06dOH3r17s3//fs6ePWtoihYsWMCdO3cMx69bt46mTZvi7e1NZGQkixcvxt/f31SnU+xJU2Qis2fPpl69eoD2f5qcu/Qf5csvv6Rfv37P/L5yD5JGctBIDhrJQSM5aPIzh549exqeugIwNzfn8OHDNGrUiCFDhlCrVi0mTZoEwM8//4xOp3vgqeQhQ4ag1+uB/5sULQqONEXPIDIykg0bNpCdnY2XlxcrVqzAzMyMVq1aMX78eBITExk/fjwZGRnUrVuX/fv38+uvvz70Om+88YbhqbKjR48aBi2mpaUxe/ZsDh06REJCAmPHjmXx4sXMnTuXP//8E6UUQ4YMeegu/seRidb3kRw0koNGctBIDpp8yOH+ac8pKSmMGjWKMWPGMGHCBCpUqMDy5ctZtGgRS5YsoVevXvzwww8sXLiQ//znPw+8jrm5OYMHD+bvv//mm2++eea6xKNJU/SMKlSowMyZM/H09GTDhg1YWlryySefsGfPHnbu3EmXLl0YNGgQe/bsYc+ePY99naSkJE6dOkVISAjVq1fniy++4Oeff2bkyJGEhYUxf/58du7cyaVLl4iIiCA9PR03Nzfatm1LhQoVHvnaMtFaCCFMIyYmhitXrjBv3jy6detGzZo1sbKyomrVqkRFRWFnZ8fatWu5ePEiZ8+e5a233iIhIYFSpUqRmppKixYtABg9ejSXL19m+PDhLFiwoNDqN9UaZPlNJloXkrp163Lx4kUSExMZPnw4oA1VjIuL48yZM7z55psAtG7d+pGvoZTixo0bVK1alerVqxMcHEy5cuUeWCAvx99//83x48fx8vICICsriytXrjy2KZKJ1g+TibUayUEjOWgkB01+5nDjxg0++eQTAgMDDROc27Rpw+3bt+nYsSMxMTG0atXqgYduQkNDsbGxwcPDgy+//JLq1avTr18/atasSbly5Qpt0nNUVJRJpkqbkjRFz8jMzIzatWtjZ2fHsmXLsLCwIDIykiZNmnDhwgWOHDlCkyZN+Ouvvx75Gt999x1t2rTBzMwMf39/tm3bhrW1NX5+fiilANDpdGRnZ+Po6IiLiwtBQUFkZ2ezePFiateubXS9MqlVIzloJAeN5KCRHDT5mcMXX3xBUlISixcvZvHixQDMmjULf39/IiIisLa2Zu7cuY88vn///vj5+bFhwwb0ej0zZszIt9rEw6QpygdVqlRhyJAheHl5odfrqVWrFr169WLYsGH4+vry008/YWtrS6lS/xe3n58flpaWAFSvXp3AwEAA+vbti5ubGxUqVMDGxobr168D2pWm4cOH8+2333Lw4EE8PT25d+8eXbt2xdrauvBPWgghxBP5+/vn+rTY4+4Nun8+nY2NDV9//XWB1CYeplM5lyJEvtu5cyeVK1emefPm7N27ly+++IJvvzXtPT0l8XJobiQHjeSgkRw0koNGctCUxBzkSlEBql27NhMnTsTc3Jzs7GzDY5dCCCGEKHqkKSpA9erVY+3ataYuQwghhBBGkKZICCGEeAa5LeVRo0YNRowYgYODAwAeHh707t2bmTNnEhUVhZmZGX5+frRq1Yq4uDgmTJiAUoqaNWsSFBRkuOdUFC4zUxfwvBk7diwZGRl5Pu7UqVMMHz4cLy8v+vfvz8KFCzH2dq5du3YxYcKEPL9nbmRirUZy0EgOGslBIzlo8ppDzlIeq1evZsmSJQQFBXHixAneffddwsPDCQ8Pp3fv3sTGxnLkyBHWr1/PnDlzCA4OBiAkJAR3d3dWr16Ni4uLDGg0IblSlEfz58/P8zFJSUmMGzeO0NBQHBwc0Ov1jB49moiICDw8PAqgykeTidb3kRw0koNGctBIDhojcsiZWJ3bUh4xMTGcO3eO7du388ILLzBx4kRsbW0pW7YsGRkZpKSkGJ5IPn36NEFBQQC0bNlSHrs3oRLbFD3tEh2dO3fmp59+IiEhgUmTJpGVlYVOp8Pf35/GjRvTvXt3WrZsyblz56hatSqhoaFs374dFxcXw2VUc3NzZs+ebVjcddasWYapoa+//jrvvPMOZ86cYeLEiVhaWmJpaUnFihUB+Omnn1i+fPkDtT6JTLQWQoiCERMTY5hrlJqayty5c+nbty+ZmZn06dMHR0dHNm3aREBAAP369SMlJYXOnTtz7949hg0bRlRUFNWrV2fZsmV06NCBLVu2kJCQUGQmSReVOp6V0U/RqRJqw4YNasSIEerWrVuqV69e6t69e0oppcaPH692796tgoOD1cqVK5VSSu3evVt16tRJKaVUp06dVFpamvLx8VG//vqrUkqpEydOqDfffFMppVTjxo3VlStXlFJKDRw4UB05ckR98cUXavny5bnW8dtvv6mPPvpIZWdnq4yMDDVgwAAVGxurfHx81O7du5VSSn355ZfKz8/vkbXmxbFjx/K0f3ElOWgkB43koJEcNE+Tw5UrV9Sbb76p1q9fr5RS6s6dO4bvnTp1Sg0ePFitWLFCffzxxyorK0vduXNHvfbaa+ratWsqPj5eeXt7q/fee08tWbJEffjhh/l2Ls/i0KFDpi6h0JXYK0XwbEt0nDlzBicnJwCaNGnCtWvXAKhcuTJ2dnYA2NnZkZ6eTs2aNTlx4sQDx8fFxXHt2jXOnDlD69at0el0WFhY0KJFC86cOcOpU6do3rw5oF1OPXv27CNrzQuZWKuRHDSSg0Zy0EgOmrzmcOPGDd577z0CAgIMS3kMHTqUyZMn07x5c/bt20fTpk2pUKEC5cqVw9zcHCsrK0qXLs3du3eJjo7mo48+onHjxixbtox///vfBXFawggluil6liU66tWrx6FDh+jSpQsnT57ExsYG0Jbj+KdOnTrx5Zdf4uHhQZ06dcjMzGTWrFn8+9//pl69ekRGRjJkyBAyMzM5cuQIb775Jo6Ojhw5coQOHToQExMD8MhahRBCmE5uS3lMmDCBGTNmYGFhgY2NjeGJssOHD+Pu7o5erzd8vJacnMzEiRMpXbo0DRo0ICAgwMRnVHKV6KYInm6JDgBfX18mT57MsmXLyMrKMjxFkBtra2vDWjdKKe7evUunTp3w9PREp9Nx8OBBBg4cSGZmJj179qRp06YEBgYyduxYvv76a6pUqUKZMmUeWasQQgjTedRSHhEREQ9tmzZt2kPbWrRoQWRkZIHUJvJGlvl4hKK4REd+KIlj23MjOWgkB43koJEcNJKDpiTmUOKvFD2KLNEhhBBClCzSFD2CLNEhhChpsrKy+OSTTx6YzFy/fn0mTJiATqejQYMGBAYGYmZmxqJFi/j9998pVaoUEydOpHnz5pw8eZKgoCDMzc0pXbo0s2fPNtxvKcTzQCZaF2Hp6el07tz5kd8/cOAAY8eOzdNrysRajeSgkRw0koNm9+7dD01mnjlzJmPGjGH16tUopdi+fTvHjx/n4MGDrF+/nnnz5jF16lQAgoODmTx5MuHh4XTr1o0lS5aY+IyEyBu5UlTCyETr+0gOGslBU8JzsK9siU+HDg880Wpubs7x48dxdnYGoEOHDuzZs4e6devSrl07dDodNWvWRK/Xk5iYyLx587C1tQVAr9dLsymeO9IUFaDIyEh27NhBWloaCQkJDB48mO3bt3Pq1Cl8fX25d+8eK1asoHTp0jg4ODBt2jQyMjIYP348SUlJ1KlTx/Ba//vf/5g+fToAlSpVeuox8HG3Ujl3426+nJ8QonixtLTB2tqalJQURo0axZgxY5g9e7Zh1IiVlRXJycmkpKRQqVIlw3E521944QUADh8+zMqVK1m1apVJzkOIpyVNUQG7e/cuy5Yt48cff2T58uWsW7eOAwcOsHz5cs6cOcPGjRuxtrZmxowZhnuYGjZsyNixYzl69CgHDhwAYPLkycyYMYP69euzfv16li5davSAL1nmQwjxJPaVtVXZf/nlF+bNm0e3bt0MV4FylnqIjo4mLS2NW7ducf36dcP2hIQEzp49y40bN9i3bx+bNm1i3LhxnDt3jnPnzpnsnJ5FcVne4lkVlxyMfYpOmqIClnMpunz58tSrVw+dTkfFihVJTU2lfv36WFtbA+Dk5MTu3bsBaN++PaDNrsiZj3TmzBnD5/aZmZnUrVvX6Bp8fHzw8fEBtHV6mjVzyZ+Te45pOTQzdRkmJzloJAfNb7/9xvz58wkMDDRMZm7RogVZWVm4uLiwefNmevfuTZ06dQgJCeHll1/m2rVrlC1blk6dOrF582b27t3Ld99998CVpOdNSXwUPTclMQdpigpYbhOuc7afOXOGe/fuUa5cOQ4ePEjdunXR6XT89ddfdO3alRMnTpCVlQVoS5LMnj2bmjVrEhUVRUJCwlPVI2P8NZKDRnLQSA6aTZs2PTSZedKkSUyfPp158+bh6OhIjx49MDc3p3Xr1gwcOJDs7GwCAgLQ6/UEBwdjZ2dn+CXMycmJUaNGmfKUhMgTaYpMxNzcHB8fHwYPHoyZmRl16tRh/PjxmJub8+mnn+Lh4YGjoyMWFhYATJkyBT8/P/R6PaA95XH9+nVTnoIQoph55513WLhw4UPbV65c+dC2+69A5zh48GCB1SZEYZCJ1iVMSbwcmhvJQSM5aCQHjeSgkRw0JTEHmVMkhBBCCIF8fCaEeM4dPXqUzz77jPDwcE6fPs3kyZNRStG4cWMmT56Mubk506dP5/Dhw1hZWQGwePFikpOT8fX1RSmFUoqvv/4aS0tLE5+NEMKU5EqREOK5tWTJEvz9/Q03Ss+bN49x48YRERFBWloav/32GwDHjx9n6dKlhIeHEx4eTvny5Vm+fDm9evVi1apV1K5dm++++86UpyKEKAKkKSphZMKsRnLQPO851KlTh9DQUMPXoaGhODk5kZGRQUJCAlWrViU7O5sLFy4QEBCAu7u7oflp0qQJSUlJAKSmphrGXwghSi75KVCAIiMj2b59OykpKdy6dYuPPvoIa2trFixYQJkyZQyTqSdMmMDIkSP517/+RY8ePRg/fjzdunXjvffeY+bMmRw+fJjly5djZmZGq1atGD9+PKGhoRw5coR79+4RHBxMvXr1jKpJlvm4j+SgeQ5zsK9sybdDXejRoweXLl0ybDc3N+fy5cu8++67WFtbU7duXe7du8fbb7/Nu+++i16vZ/DgwTRr1owaNWowd+5cfvjhB5KTk5k2bZoJz0gIURRIU1TA7t27xzfffENiYiKurq7odDrWrFlD9erVWbFiBWFhYXTv3p1du3ZRqVIlypQpw549e2jTpg3p6emUKVOG0NBQNmzYgKWlJZ988gl79uwBwNHREX9//yfWIBOtRXEUExNDeno6CQkJ3L1794HJuzNnzmTHjh188sknfPDBBzRv3pwTJ04A2syvn3/+mZ9++on33nuPFi1acOTIEUaMGIGvr6+pTqfIKC4TjJ+V5KApLjnIROsiwsnJCTMzM2xsbChXrhxZWVlUr17d8L158+YxYsQIPvzwQypXrsywYcP45ptv2LVrF506deLixYskJiYyfPhwQFs2JC4uDsDoqdYy0fphMsFYUxxyuHTpElZWVrRq1YoRI0YwYcIEHBwciI+P5+bNm1SpUoWxY8eyceNGsrOzCQkJYcSIERw5coSWLVvy0ksvcf78eXQ6XYl7/PifSuIj2LmRHDQlMQdpigrY8ePHAbhx4wapqakAXL9+HVtbWw4ePIiDgwMVK1akbNmy/PTTT4SGhrJ161ZWrFjBZ599hrW1NXZ2dixbtgwLCwsiIyNp0qQJ27Ztw8ws77eEyeRejeSgKW45DB8+nAkTJmBhYYGlpSXTp0/H1taWPn364ObmhoWFBX379qVBgwZMnjyZadOmkZ2dTXJyMjNnzjR1+UIIE5OmqIDduHGDd955h+TkZKZMmUKpUqXw8fExrIGW84O4S5cuREZGUqlSJdq1a8fq1aupU6cOAEOGDMHLywu9Xk+tWrXo1auXKU9JiCKldu3arFu3DoCWLVsSERHx0D7Dhg1j2LBhD2yrX78+336rfZwcFRXFiy++WPDFCiGKNGmKCpiTkxPjx49/YFtuq9t7enri6ekJgLu7O+7u7obv9e3bl759+z6w/z/H6wshhBDi2cgj+UIIIYQQyJWiAvXWW2+ZugQhhBBCGEmuFAkhTO7o0aN4eXkBcPLkSTw9PfHy8mLo0KHcuHHDsF92djbvv/8+a9asASAtLQ0fHx88PT0ZNmwYiYmJJqlfCFE8SFP0nLl58yYjR45k0KBBuLu7c/HixTwd/7xPMM4vkoOmKOTwz6U6goODmTx5MuHh4XTr1o0lS5YY9l2wYAF37twxfL1mzRoaNmzI6tWr6devH4sXLy70+oUQxYd8fPacCQkJoU+fPvTu3Zv9+/dz9uxZw1NqxpCJ1veRHDQmyiFnKnXOUh05gxPnzZuHra0tAHq93tC4/fzzz+h0Ojp06GB4jaioKN5//30AOnToIE2REOKZSFNURHh7ezN48GCcnZ2Jjo4mLCyMsLCwh/Y7fPgwjRo1YsiQIdSqVYtJkyY98bVlorUoqmJiYrCxsSE2NvaBqdRxcXH8/fffLF26lICAADZt2sR3333H6NGjiYyM5N69e0RFRXHlyhUuXryIXq8nOzubxMTEp57AW1wm9z4ryUEjOWiKSw4y0fo54+rqysaNG3F2dmbjxo24ubnlut/ly5epUKECy5cvZ9GiRSxZsoTRo0c/9rVlovXDisMk5/xQVHK4fyo1wH//+19Wr17Nt99+i729PXPmzCE9PZ2FCxdy+fJlLCwseOWVV6hZsyZFsqzAAAAgAElEQVQODg40b96c5ORkqlat+lQTeEvi5N7cSA4ayUFTEnOQpqiIaN++PSEhIdy+fZtDhw49ck2zSpUq0blzZwA6d+7M/Pnz8/Q+xW2C8dOSHDRFMYfNmzezdu1awsPDqVSpEsADa5KFhoZiY2NDhw4dOH36NDt37qR58+bs2rWrxP0AF0LkL7nRuogwMzOjZ8+eTJkyha5du2Jubp7rfq1atWLnzp0A/Pnnn9SvX78wyxSiQOn1eoKDg7l79y4+Pj54eXmxcOHCR+7v4eHBqVOn8PDwYO3atXh7exditUKI4kauFBUh/fv3p2vXrmzduvWR+/j5+eHv709ERATW1tbMnTu3ECsUomDcv1THwYMHH7vv/dPcLS0tH9s0CSFEXkhTVITY2dkZFpB9lFq1avHNN98UUkVCCCFEySFNURF05coV/Pz8Htru5OTEqFGjTFCREEIIUfxJU1QE1axZk/DwcFOXIcQTHT16lM8+++yBv68zZsygbt26eHh4ALBu3ToiIiIoVaoUI0eOpFOnTly5cgVfX1+UUlSsWJG5c+diaWlpqtMQQghAbrQucYrCBOOiQHLQPEsO/5xEnZiYyPvvv89vv/1m2CchIYHw8HAiIiL4+uuvmTdvHhkZGSxfvpxevXqxatUqGjRowHfffffM5yKEEM9KrhSVMDLR+j6SgyaPOTxqEnXOE2O7du0y7BsdHc3LL79M6dKlKV26NHXq1CE2NpYmTZpw7do1AFJSUqhRo0a+nY4QQjwtaYoKQWRkJDt37iQtLY2LFy8ybNgwateubZgynZaWxuzZs7GwsGDs2LHY2dlx6dIlXnvtNU6dOsWJEyfo2LEj48aN43//+x/Tp08HtJlFM2bMoHz58o99f5loLfLboyZRg3ZPXM7E6ZiYGMOfQfu7fvjwYXQ6HcuXL2f9+vVkZWXRvn17k0/ONfX7FxWSg0Zy0BSXHIyeYaZEgduwYYN67733lFJKnTt3TvXo0UOtXLlSXbt2TSmlVFhYmFq8eLGKi4tTLi4uKikpSV2/fl3961//Urdu3VJpaWnqlVdeUUop5erqqk6dOqWUUmrdunVq3rx5earl2LFj+Xhmzy/JQfOsOcTFxSlXV9cHti1cuFCtXr1aKaXUtm3bVGBgoOF7H374oYqOjlb9+vVTu3btUkoptWPHDjVs2LBnquNZHTp0yKTvX1RIDhrJQVMSc5ArRYWkcePGgPbYfUZGBtWrVyc4OJhy5coRHx9Py5YtAbC3t6d8+fKULl0aGxsbw0RfnU4HwJkzZ5g6dSoAmZmZ1K1bN091FMUJxqYgOWgKOofmzZuzYMEC0tPTycjI4MyZMzRs2JAKFSoYrnDa2tqSlJRUoHUIIYQxpCkqJDlNTQ5/f3+2bduGtbU1fn5+KKVy3e+f6taty+zZs6lZsyZRUVEkJCQUWM1CPKtq1arh5eWFp6cnSinGjh1LmTJlmDx5MtOmTSM7OxulFAEBAaYuVQghpCkylb59++Lm5kaFChWwsbHh+vXrRh03ZcoU/Pz80Ov1AAQHBxdkmUI81v2TqHPcP3EawM3N7aEFjuvXr8+338r9bUKIokWaokLw1ltvGf5cpkwZwyPLn3766UP75vwDc/9+AHv27AGgWbNmMsNICCGEKAAyp0gIIYQQAmmKhBBP4ejRo3h5eQFw4cIFPDw88PT0JDAwkOzsbABmzpzJgAEDcHNzMzzWGxcXx6BBg/D09GT8+PGkpqaa7ByEEOKfpCkSQuTJPydZz5w5kzFjxrB69WqUUmzfvp3Y2FiOHDnC+vXrmTNnjuHet5CQENzd3Vm9ejUuLi6yuLEQokiRpug5c/r0aTw8PHB3d2fKlCmGG66NJctbaCQHzdPkkDPJOsfx48dxdnYGoEOHDuzduxdbW1vKli1LRkYGKSkplCql3b54+vRpOnToAEDLli2LzWA4IUTxIDdaP2fmzZvHuHHjcHJyYsKECfz2229069bN6ONlmY/7SA4aI3PIWd6jR48eXLp0ybBdKWUYJWFlZUVycjKlSpXCzMyMXr16kZycTFBQEABNmjTht99+480332T79u3y8ZkQokiRpqiI8Pb2ZvDgwTg7OxMdHU1YWBhhYWEP7RcaGoq5uTkZGRkkJCRQtWrVJ762LPMh8ktMTAzp6ekkJCQYlvfQ6/WGKz7R0dGkpaURGhqKmZkZs2fPJjU1lalTp2Jubk6vXr1Yvnw5q1evpmnTpuh0uiJztaio1GFqkoNGctAUlxyMXeZDmqIiwtXVlY0bN+Ls7MzGjRsfmuuSw9zcnMuXL/Puu+9ibW1t1ERrHx8fw+yYmJgYmjVzydfan0daDs1MXYbJPW0Oly5dwsrKilatWtGiRQuysrJwcXFh8+bN9O7dm4yMDNLT03FyckKv11OxYkXq1atHdHQ0/v7+NG7cmGXLltGgQQPj1yQqQFFRUUWiDlOTHDSSg6Yk5iD3FBUR7du359ixY9y+fZtDhw4Z7rvITa1atfjll1/w8PBg1qxZeXofWd5CIzlo8iMHPz8/QkNDGThwIJmZmfTo0YM+ffoA4O7ujru7O3369MHR0ZG6desyceJE3N3dOXfu3CObfyGEMAW5UlREmJmZ0bNnT6ZMmULXrl0xNzfPdb8RI0YwYcIEHBwcsLKywsxM+lpR+O6fZF23bl1Wrlz50D7Tpk17aFuLFi2IjIws8PqEEOJpSFNUhPTv35+uXbuydevWR+4zfPhwJkyYgIWFBZaWlkyfPr0QKxRCCCGKL2mKihA7OzuOHz/+2H1atmxJREREIVUkhBBClBzSFBVBV65cwc/P76HtTk5OjBo1ygQVCSGEEMWfNEVFUM2aNWXRV1GkHD16lM8++4zw8HAuXLjAhAkT0Ol0NGjQgMDAQMzMzBgxYgS3b9/GwsKCMmXKsHTpUk6ePElgYCDm5uY4ODgQHBws98EJIYos+en0nJoxYwZr1qzJ83EyyVkjOWiMycGYZT0ALl68yJo1awgPD2fp0qUALFq0iI8++og1a9aQkZHB77//XmDnIoQQz0quFD1nEhMT8fX15fz58wwdOjTPx8tE6/tIDppH5JAzwTpnWQ9fX1/g4WU99uzZw8svv0xSUhIjRowgKSmJ4cOH06lTJ5o0acLt27dRSnH37l3Dch9CCFEUyU+oIsLYidZ3797Fx8eHXbt2Gf3aMtFaPK2YmBhsbGyIjY01TLDOyMjg8OHDgHb/24ULFzh8+DDdu3enZ8+epKSkEBgYiFIKpRRTp05l/vz5lCtXDgsLiyI7Ibeo1lXYJAeN5KApLjnIROvnjLETre3t7bG3t89TUyQTrR8mE601xuZw/wTrMmXKGH7A3Lp1CwcHBzp16kS7du0oV64cAJs3b6ZChQqsXr2atWvX0qBBA1atWsUvv/xCYGBggZ7T0yiJk3tzIzloJAdNScxB7ikqIvIy0fpZyCRnjeSgeZocXnzxRQ4cOADArl27aN26NXv37mXMmDGAdjXz1KlTODo6UrFiRaytrQGwtbUlKSkp/4oXQoh8JleKighjJ1oLYWp+fn5MnjyZefPm4ejoSI8ePTA3N2f37t24ublhZmbGuHHjqFKlCtOnT2fs2LGUKlUKCwsLgoKCTF2+EEI8kjRFRYgxE62FMAVjlvWYNGnSQ9tat24tw0aFEM8NaYqKEGMmWufIuUdICCGEEPlDmqIiSCZaCyGEEIVPmqIiSCZai6LAmCnW8+fPZ+/eveh0Ovz9/WnevLnh+D///JPx48ezc+dOE56FEEIYT54+K2FkkrNGctA8KgdjplifOHGCv/76i3Xr1jFv3jz8/f0Nx1+9epVly5aRlZVVKOchhBD5Qa4UPWce9Ru7sWSi9X0kB83/zyFngjVg1BTrwMBAvv76a3Q6HVeuXMHGxgbQHvMPDAwkKCiIt956q/DPRwghnpI0Rc+ZnN/YXVxcCAgIYPv27XTr1u2xx8hEa2GsmJgY0tPTjZpinTPpdu3atWzdupV33nmHqKgovvrqK9q1a8elS5fIzMx8bibiPi91FjTJQSM5aIpLDkYPoVSiSPjoo4/UgQMHlFJKHT16VI0YMSLX/dq1a6eys7OVUkr9+uuvasqUKXl6n2PHjj1bocWE5KB5XA5xcXHK1dVVKaVU+/btDdt//fVXNXXq1Af2TU5OVr1791aHDh1S3bt3V2+//bZ6++23VdOmTdWYMWMKpvh8dOjQIVOXUCRIDhrJQVMSc5B7ioqInGU+gMcu86GUQqfTAWBlZUVycnKe3kcmOWskB42xOeQ2xXrfvn1MnToV0O5NKlWqFLa2tmzdupXw8HDCw8OpWLEi8+fPL7D6hRAiP0lTVEQYu8zH/fcP3b17lwoVKhRWiaIE8/PzIzQ0lIEDB5KZmUmPHj1wdnYmOzsbd3d3Bg0axKBBg7C3tzd1qUII8dTknqIiwthlPnJ+Y3dxcWHXrl20adOmkCsVJYUxU6xzrhQ9yp49ewqkNiGEKAhypagI6d+/P7/++iv9+/d/5D65/cYuhBBCiGcnV4qKEGOW+XjUb+xCCCGEeDbSFBVBssyHMAVjJlgvWrSI33//nVKlSjFx4kSaN2/OzZs38ff3JykpCb1ez5w5c6hTp46pT0cIIfJMmqIiSJb5EIVtyZIlbNmyBUtLSyD3eVg1a9bk4MGDrF+/nqtXr+Lj48OGDRsICQmhT58+9O7dm/3793P27FlpioQQzyW5p6iImzFjBmvWrDF8vXz5clxdXXF1dTUMZMwLWd5CIzlocnLImWCd458TrPfu3UtUVBTt2rVDp9NRs2ZN9Ho9iYmJHD58mPj4eIYMGcL3339vOE4IIZ43cqWoiEpMTMTX15fz588zdOhQAOLi4tiyZQvr169Hp9Ph6elJ165dady4sdGvK8t83Edy0Jb2aAY9evTg0qVLhu25zcNKSUmhUqVKhn1ytl++fJkKFSqwfPlyFi1axJIlSxg9enShn4sQQjwraYoKmbe3N4MHD8bZ2Zno6GjCwsIICwt7aL+7d+/i4+PDrl27DNtq1KjB0qVLDY/rZ2VlGXXFQ5b5EI+Ts7RHQkKCYVkPvV5vGO8fHR1NWloat27d4vr164btCQkJnD17FisrK6pWrUpUVBR2dnasXbuWdu3amfKUnkpxWc7gWUkOGslBU1xyMHaZD2mKClnO5GpnZ+fHTq62t7fH3t7+gabIwsKCKlWqoJRizpw5vPjii9StW/eJ7+nj44OPjw+g/QPYrJlL/pzMc0zLoZmpyzC5+3O4dOkSVlZWtGrVihYtWpCVlYWLiwubN2+md+/e1KlTh5CQEF5++WWuXbtG2bJl6dSpE23atOH27dt07NiRmJgYWrVqZfw6Q0VEVFTUc1dzQZAcNJKDpiTmIPcUFTJjJ1c/Snp6OuPHj+fu3bsEBgbm+f1leQuN5KB5VA65zcNq1qwZrVu3ZuDAgfj4+BAQEGDYd/Pmzbi7u/PHH38wYsSIwjwFIYTIN3KlqJAZO7k6N0opPvzwQ1xcXBg+fHgBVilKImMmWN9/1TFHrVq1+OabbwqlRiGEKEjSFJlA//796dq1K1u3bs3Tcdu2bePgwYNkZGTwxx9/ADBu3DhefvnlgihTCCGEKFGkKTIBYyZX57j/t/Ju3bpx7NixgipLCCGEKNGkKTIhmVwtCkpGRgaffvopcXFxWFtbExAQgIODA3q9nrFjxzJgwADD/WwzZ84kKioKMzMz/Pz8StyNlUIIkUOaIiNFRkZy9uxZxo8fD8CuXbu4evUqbdu2Zdy4cYZ7MfLicZOr09PT2bJlC66urs9UtyiZ1q1bR7ly5Vi3bh1nz54lKCiIwMBA/Pz8uHbtGgMGDADgwoULHDlyhPXr13PhwgXGjRtHZGSkiasXQgjTkKfPnlKHDh0YOHBggb1+QkIC69evz/fXlUnOmuKew+nTpw1XghwdHTlz5gz37t1j+vTpuLj830iGypUrU7ZsWTIyMkhJSaFUKfk9SQhRcslPwDxKTEzkww8/pH///ly4cAF3d3fD9/bs2cOCBQsoU6YMlSpVYsaMGZw8eZKvvvoKCwsLrl27hru7O/v37yc2NpbBgwfj6enJwYMHmT9/Pubm5tjb2zNt2jS++OILTp8+zaJFi3jnnXeYNGkSt27dAsDf359GjRrRqVMnHB0dcXR0ZNKkSUbVLxOt71MMc7CvbMm3Q11o0qQJO3bsoGvXrhw9epT4+HgaNGjw0NOO5ubmmJmZ0atXL5KTkwkKCjJR5UIIYXrSFOXBzZs3GTlyJBMnTuTMmTMPfE8pxeTJk1mzZg3Vq1dnxYoVhIWF0bFjR65du8amTZs4fvw4o0eP5tdffyU+Ph5vb288PDyYPHkyq1evpmrVqixYsICNGzcyYsQI/v77b7y9vQkJCaFNmzZ4enpy/vx5Pv30U9asWcPVq1eJjIykcuXKj61bJlqXLDExMdStW5d9+/bx5ptv0rBhQxwcHPjrr78A7e/x6dOnsbKy4o8//sDMzIzZs2eTmprK1KlTMTc3p0qVKiY+i8JXXCb3PivJQSM5aIpLDjLRugD88ccfVKtWjezs7Ie+d+vWLaytralevTqg3Sw9b948OnbsSIMGDbCwsKB8+fLUqVOH0qVLU7FiRdLT00lMTOT69euMGTMGgLS0NNq2bfvAa//999/s37+fn376CYCkpCRA++jjSQ0RyETr3BT3idZHjhyhV69e9OjRg2PHjrFs2TLDD4WqVatSv359WrVqxR9//EHt2rVxcnJCr9dTsWJF6tWrh6Ojo4nPoHCVxMm9uZEcNJKDpiTmIE1RHvTr149+/foxevRoPD09H/he5cqVSUlJ4fr169ja2nLw4EEcHBwADAtr5qZy5crUqFGDxYsXU758ebZv3065cuUwMzMzNF+Ojo688cYb9OnTh5s3bxruNTIzy/stYTLJWVPcc3jhhRf4/PPPWbZsGeXLlyc4ODjX/dq2bcv333+Pu7s7er2ePn36lLiGSAghckhTlEf169fnjTfeYObMmQwZMsSwXafTMX36dHx8fNDpdFSsWJGZM2dy6tSpx76emZkZkyZNYvjw4SilsLKyYs6cOVhbW5OZmUlISAgjRoxg0qRJrFu3jpSUFLy9vQv4LMXzrkqVKixfvjzX782aNcvwZzMzM6ZNm1ZIVQkhRNGmU0opUxchCk9JvByaG8lBIzloJAeN5KCRHDQlMQd5JF8IIYQQAmmKhBBCCCEAI+8pio6OJioqikGDBjFixAhOnDjBnDlzDMPhhCiO+vXrR/ny5QFtBflOnToxZ84c7OzsAO2pPmdnZ0B7zP2tt95i2bJl1KtXz2Q1CyGEeHpGNUXTp09n1KhRbN26lbJly7Jx40a8vb2lKXoOFfdJzsZ6Ug45T6fdvwzL/Pnz+eSTT+jRo8cD+2ZmZhIQEEDZsmXzv1AhhBCFxqimKDs7m3bt2vHxxx/TvXt37Ozs0Ov1BV2bAGbMmEHdunXx8PAwbMvOzmb48OF06dLlge3GkInW98klh5yJ0LGxsaSmpvLee++RlZXFuHHjOH78OCdPnmTFihU0b96c8ePHU6pUKWbPno27uztfffVV4Z+DEEKIfGNUU2RpacmyZcvYv38/AQEBfPvtt1hZWRV0bSVaYmIivr6+nD9/nqFDhz7wvQULFnDnzh2jX0smWudNTEwMZ8+epWvXrnTq1Ilr167h4+NDly5dcHZ2xtbWlq+//po5c+ZQtmxZ0tLSKFeuHMnJyRw/fpzbt2+b+hTypLhMrH1WkoNGctBIDprikoOxT9EZ9Uh+fHw869evp23btrz88suEhITg5eVFjRo1nrnQksbb25vBgwfj7OxMdHQ0YWFhhIWFPbRfXFwciYmJ7Nq1CxsbG8MVoZ9//pmTJ09SqlSpB7Ybq7hPcjbWk3LIyMggOzvb8JHYgAEDCA0NNdxPtHPnTrZu3cqFCxfQ6XTodDpOnjyJg4MDYWFhVKtWrVDO41mVxEducyM5aCQHjeSgKYk5GPX0WfXq1WnTpg2xsbFkZGTQsWNHaYiekqurKxs3bgRg48aNuLm55bqfvb09LVq0eGDb33//zQ8//MDo0aOf+v2L+yRnYz0ph++++84w5DA+Pp7k5GRcXV25du0aAPv27aNp06asWrWKlStXEh4eTpMmTZg9e/Zz0xAJIYR4kFEfn61YsYJt27Zx/fp1evbsSUBAAAMGDHjoYx3xZO3btyckJITbt29z6NAh/P39jT5206ZNxMfH884773D58mUsLCyoVauW3PBeAAYMGMCnn36Kh4cHOp2OmTNncu/ePby9vSlbtiz16tV7ZEMrhBDi+WRUU7Rx40bWrVuHm5sblStX5rvvvsPV1VWaoqdgZmZGz549mTJlCl27dsXc3NzoY319fQ1/Dg0NxcbGRhqiAlK6dGnmzp370PZ27do98pj7n1QTQgjx/DGqKTIzM6N06dKGr8uUKZOnf8zFg/r370/Xrl3ZunWrqUsRQgghxP9nVFPk7OzM7NmzSU1NZdu2baxdu5Y2bdoUdG3Flp2dHcePHzdqXx8fnzxtF0IIIcTTMaop8vX1Zd26dTRq1IhNmzbx6quv4u7uXtC1FXtXrlzBz8/voe1OTk6MGjXKBBUVb/dPnT537hxjxozBwcEBAA8PD3r37s3MmTOJiorCzMwMPz+/EvfkhRBClGRGNUXDhg3j66+/lkYon9WsWVPuQykk/5w6ff78ed59913ee+89wz6xsbEcOXKE9evXc+HCBcaNG0dkZKSpShZCCFHIjHokPzU1latXrxZ0LcIIJ0+exNPTEy8vL4YOHcqNGzfydHxJXeYjZ+q0ra0tAGfPnuX3339n0KBBTJw4kZSUFGxtbSlbtiwZGRmkpKRQqpRRvzMIIYQoJoz6qZ+YmEjnzp2pWrUqZcqUQSmFTqdj+/btBV2f+Ifg4GAmT55MkyZNiIiIYMmSJXz66adGH1/Slvmwr2xJv8qXqVKlCu3btzcsxVGvXj169OhBs2bNCAsL4z//+Q8jR47EzMyMXr16kZycTFBQkImrF0IIUZiMmmh9+fLlXLfXqlUr3wsqqYyddH39+nXD1Y5Vq1YRHx/PuHHjHvva/1zmI+5Wav6fQBFlX9mSpO9nGhr5CxcuUKNGDcaPH0+lSpUAuHTpEitWrKBVq1acOXOGkSNHkpqaytSpU5kwYQJVqlQx8VkIIYR4FsbeH2rUlaI///wz1+3SFOWfnEnXzs7Oj510ndMQHT58mJUrV7Jq1aonvraPj4/haTVteQuX/Cv8eTB0k+GPXl5eTJkyBR8fH2bNmkXz5s05ceIEbdq0oWHDhqSnp+Pk5IRer6dixYrUq1cPR0dHExZfsEriGP/cSA4ayUEjOWhKYg5GNUUHDhww/DkzM5OoqChat25Nv379CqywkiYvk67/+9//EhYWxldffZXnqxiyzIfmvffeY8aMGVhYWGBjY0NQUBCWlpYcPnwYd3d39Ho9ffr0KdYNkRBCiAcZ1RTNnDnzga9v377N2LFjC6SgksrYSdebN29m7dq1hIeHGz7+EcbLedrv9u3bREREPPT9adOmFXZJQgghioinerymXLlyj7zPSDy9J0261uv1BAcHY2dnZ/g4TGYaCSGEEPnDqKbIy8sLnU4HgFKKS5cuyZpbBeBJk67Nzc05ePBgIVYkhBBClBxGNUX3Lymh0+moXLky9evXL7CiSjqZdJ3/7p9mXa9ePQC+//57Vq5cydq1awH46quv+PHHH7G2tub999+nU6dOpixZCCFEITOqKdq6dSuTJ09+YJufnx+zZ88ukKJKOpl0nb/+Oc0atInWW7ZsIWcixf/+9z9++OEH1q9fD4C7uztt2rTB0tLSJDULIYQofI9tiiZNmkRcXBwxMTGcOnXKsD0rK4vk5OQCL0487OTJkwQGBmJubo6DgwPBwcGYmRk1mBwomROtc6ZZ5wxuvHXrFhEREUyfPt3Q7J85cwZnZ2dDPi+88AL/+9//eOmll0xWtxBCiML12KZo5MiRXL58meDgYLy9vQ3bzc3NDR9BiMK1aNEiPvroI1599VU+/vhjfv/9dzp37mz08SVlorV9ZUu+HepCZGTkA9Oss7OzmTRpEl5eXlhZWRn2b9SoEV999RUpKSlkZmZy5MgRBg4caMIzEEIIUdiMmmgN2iPMqampKKXQ6/VcunSJV155paDrKzGMnWi9aNEi7O3teeONNxg5ciSenp5PvOm9JE60tq9sia+LFZMmTQIwTLNOTU2lWrVqVK1alczMTC5fvsyrr77K4MGD2bFjBzt37qRGjRqkpaXx5ptv8sILL5j4TIQQQjwro4dQKiMsXLhQtWzZUjVv3lx16NBBNW7cWA0YMMCYQ4WRfv/9dzVhwgSllFJTpkxRv/32W677ff/998rJyUn16NFDDRgwQKWlpeXpfY4dO/bMtT6v3n77bXX69GmllFKHDh1ScXFxytXVVSml1M2bN9WXX36plFIqKSlJDRgwQGVlZZms1sJy6NAhU5dQJEgOGslBIzloSmIORt2MsnHjRnbu3Env3r0JDw8nLCyMypUrP0vTJv6hffv2HDt2zDDR+lFXf4KDg1m1ahU///wz/fr1Y9asWXl6H5lonbvKlStz6dIl+vfvz7Bhw/D19X3kAE0hhBDFk1FPn9na2mJtbU2DBg2IjY2le/fuzJ07t6BrK1GMnWhdsWJFrK2tAe2/y+HDhwuzzOfaP5/oq127NuvWrQO0j9dkmrUQQpRsRjVF1tbWbNq0iaZNm7Jy5UpsbW1JS0sr6NpKnCdNtAaYPn06Y8eOpVSpUlhYWBAUFFSIFQohhBDFl1FNUXBwMD/++CP9+vVjx44dBAQEMGbMmIKurcR50kRrgNatW+e6ZpcQQgghno1RTVH16tVxd3cnNjYWX19f0tLSKFeuXEHXVmLJRGshhBCi8BnVFO3bt4+AgHAeuAEAACAASURBVAD0ej1r166lT58+fPbZZ7Rr166g6yuRZKJ1/rh/aQ+lFJMnT0YpRePGjXnttdcAWL58OT/++CMAr7766gPzuIQQQpQsRj19Nm/ePFavXk2FChWoVq0aK1euZM6cOQVdmygAJWWi9T+X9pg3bx7jxo0jIiKCtLQ0oqKiiIuLY8uWLURERLB27Vp2795NbGysiSsXQghhKkZdKcrOzqZatWqGr2Ux2MIzY8YM6tati4eHh2FbYmIi7u7ufP/993lucor7ROucSdb/XNojNDQUc3NzMjIySEhIoEWLFtSoUYOlS5canvTLysoqMU2jEEKIhxnVFNWoUYMdO3ag0+lISkpi1apV1KxZs6BrK9ESExPx9fXl/PnzDB061LD9jz/+YO7cudy4ccPo1/rnROvibuHChYb73pKTkzl+/Di3b98mISGBGTNmUK5cOezs7IiOjgbg7NmzrF69mmrVqpGYmEhiYqKJz6BwRUVFmbqEIkFy0EgOGslBU1xyMHai9WOX+YiPj6d69ercvHmT4OBg9u7di1IKFxcX/P39sbW1zbeCSwpjl/OIi4sjMTGRXbt2YWNjY7hStGfPHl588UX69+/PTz/9lOcrGzExMTRr1ixfzqWoGjRoEDqdDp1Ox8mTJ3FwcCAsLMxwtXP9+vVs3bqVpUuXkp6ezsSJE7GysjIstFuSREVFGT/+vhiTHDSSg0Zy0JTEHB57T9GIESMAqFq1Ks2aNWP//v0cOHCAhQsXSkP0lFxdXdm4cSOgTQp3c3PLdT97e3tatGjx0Pa2bds+0zTxkjDRetWqVaxcuZLw8HCaNGnC7NmzmTx5MufPnwfAysoK3f9r797jcj7cx4+/7rtSSVSMRM42YWwOZUM+tgyRcyhlJjOntmaoqDQlI9OmTTv5fGyZYwzbHjsybM4yozYzJB1MqFA63+/fH/ev+1uk1XTSfT0fjz3W/e59uN6X+zHX3ofrUqlQFIU5c+bwxBNPsGzZMr0riIQQQpRWblFU8iLSl19+We3B6IOKjvMQVWvmzJn4+fnh6enJrl27mDRpEj/++CPHjx/n559/xtPTE09PT3799dfaDlUIIUQtKfeZIpVKpfu5nLtsohIqOs5DVI2SrQ1KNr0svix89uzZ2ghLCCFEHVShB62hdIEkHk5FxnkIIYQQomaVWxT99ddfPP/884D2oevinxVFQaVSsXfv3uqPsB6qyDiPYt7e3mUu37dvX1WGJIQQQui9cosiuZJRvWScR9Ur2cXa0NAQPz8/VCoVnTt3ZunSpajVaiIiIvjhhx9o1KgRAQEB9OjRo7bDFkIIUQeUWxS1atWqpuLQSzLOo2rd28V6xYoV+Pj44ODgQFBQEHv37qVVq1acPn2aZcuWYW1tzZw5c9izZ08tRy6EEKIuqNCYD1F3xMfHM2HCBNzd3QkJCUGj0VRq+/rcsbm4i3Vxu4j4+Hjs7e0BcHR05PDhw3Tt2pX169ejUqlITU2lWbNmtRmyEEKIOqTCD1qLuiEwMJCAgAB69epFREQEX375JaNHj67w9vVtzEfxWI+dO3diZWXFwIEDdaM9ip99A21vojt37gBgaGjI1q1b+eGHHwgMDKy12IUQQtQt5Xa0FjWnop2u+/fvz6FDhwA4cOAAe/fuZdmyZeXu+94xH0kZOVV/ArXE1tKURQ5mLFmyBNC+JZmYmIi1tTWXL19m48aNAJw8eZKzZ8/y0ksv6bbNyckhKCiIBQsW0KJFi1qJXwghRPWraGduuVJURxR3ura3t//HTtfHjx/H3t6en376iZycfy5wvL29dW+xacd8OFRp7HXB7t27dT97enoSHBxMeHg4hYWFODg4sHv3bpydncnPz+f7779n5MiRODg4YG5uzpNPPomtrW0tRl979LGNf1kkD1qSBy3Jg5Y+5kGeKaojKtrpOiwsjA8//JCZM2fStGnTSo/80IcxH8V8fX2JjIxk0qRJFBQUMHToUOzt7dFoNAQHBzNlyhSmTJmitwWREEKI0uRKUR1R0U7XBw4cICwsjBYtWhASEiJjQspQ8o2+4ttnJb355pt6+X9AQgghyidFUR1SkU7Xbdu2ZebMmZiamuLg4MCgQYNqMEIhhBCi/pKiqA6pSKfr5557jueee66GIhJCCCH0hxRFdZB0uq64oqIiAgICSEhIwMDAgBUrVpCdnc3SpUsxMDCgXbt2LF++HLVazYEDB3j//fcBeOyxx+jVq5fM9BNCCKEjRVEdJJ2uK+6nn34CYMuWLRw7dowVK1agVquZO3cugwYN4o033mD//v3Y29sTHh7OZ599hpWVFcHBwWRkZGBlZVXLZyCEEKKukKLoERMfH8+sWbNo164dAG5ubjg7O1d4+/rW0drJyYn//Oc/ALoO1S1atCAzMxNFUcjOzsbQ0JBff/2Vxx9/nJUrV5KUlETfvn2lIBJCCFGKFEWPmN9//52XXnqJ6dOn/6vt60tH6+JO1qDtUO3r68sPP/zA2rVryczMZNmyZURFRWFubo6DgwPfffcdx44dY9euXTRs2JCxY8cyZswY2rdvX8tnIoQQoq6QjtZ1REU7Wi9dupSEhASKiopo27YtixcvplGjRuXuuz52tC7uZF2y71JmZiZBQUHk5eURGBhI69at+f7770lJSaFXr158//33LFy4EIBPP/2UJ554gn79+tXWKQghhKgh0tH6EVPRjtY9evTA1dWV7t27ExUVxfvvv1/mQ9kl1eeO1rt27eLatWu88sorZGVlYWxsjLm5Ofb29rRs2ZL09HRu3LjBmDFj2LhxI+3bt6dx48ZcuHCBefPm8fjjj9f2KdQq6dekJXnQkjxoSR609DEPUhTVEQMHDiQ8PFzX0TogIKDM9YYMGULjxo11P4eEhFTqOPWto/ULL7yAv78/U6ZMobCwkMWLF2NhYcHrr7+OoaEhRkZGhISEYGVlxRtvvMGMGTMA7Zt8+l4QCSGEKE2Kojqioh2tvby8CAwMpEePHhw5coRu3brVcKR1S8OGDXn33XfvW75ly5b7lo0YMYIRI0YA2v8DEkIIIUqSoqgOqUhH6+DgYEJCQjAyMqJZs2aVvlIkhBBCiLJJUVSHVKSjdbdu3cq8CiKEEEKIhyNFUR0kHa2FEEKImidFUR0kHa0rpqwRHxEREdy4cQOAlJQUevbsSUREBKGhoZw6dQozMzMWLFhQy5ELIYSoi+pEUbR161bGjRvHtWvXGDVqlO7h4by8PN2DtE2aNHno4+zcuZMmTZrw/PPPV2q7vLw8hg8fzr59+/Dz8yM+Ph4LCwsKCwuxtLTE398fW1vbh46vJtSnjtZljfgo7u1069Ytpk6dir+/Pz/99BMJCQnExMSQmZnJjBkzWLJkSW2GLoQQog6qE0XRhx9+yJgxYwDo1KlTqaskb7/9NjExMXh5eT30ccaNG/fQ+wBYuHAhjo6OAJw8eRIfHx927NhRJfuubvWho3VxN+uyRnwUi4yMxMPDg+bNm7N7924GDhyIWq3GysoKAwMDMjMzayl6IYQQdVW1FUU7d+7kwIED5ObmcuXKFV5++WW6dOlCSEgIBgYGGBsbExISwqFDh7h+/Tqvv/46ixcvLrUPRVG4evUqbdq0ASA6OpqvvvoKlUqFs7MzU6dOxc/PT7fe3bt3WblyJR07duTtt98mLi6O7OxsOnbsyIoVK4iMjKRZs2Z06NCB1atXY2RkxMSJE7GxsSEiIgIDAwNsbW1ZtmwZ+fn5LFiwgNu3b+uOX5Y+ffpgZGREYmIieXl5vPXWW2g0Gm7fvk1AQAC9evViyJAhPP300yQmJtKvXz/u3LnDmTNnaN++PeHh4Zw/f77M7bZv387nn39OkyZNMDIywtnZGRcXF5YuXUpiYiIajQYfHx8cHMpvxnhvR+v6IC4uTtdzKSoqipMnT/Laa68RGxvLrVu32LdvH0OHDiU2NhZDQ0O++uor7OzsuHnzJn/++Sd5eXnyWv7/J3nQkjxoSR60JA9a9SUPFW5CqVSTHTt2KNOnT1cURVESEhKUoUOHKmPHjlV+//13RVEU5YcfflC8vb0VRVGUwYMHK7m5uUpSUpLy9NNPKx4eHsrIkSOVIUOGKGvXrlUKCgqUv/76S5k8ebJSWFioFBUVKZ6ensrFixcVX19fJTIyUlEURdm/f7/yyiuvKHfu3FE++ugjRVEUpaioSBk2bJjy999/K2vXrlU2bdqkHD16VHFxcVEURVE0Go3ywgsvKDdu3FAURVEiIiKUrVu3KtHR0cqaNWsURVGU06dPK4MHD1YURVF8fX2VAwcOlDrXOXPmKL/++qvy9ddfK+fOnVMURVH27NmjLFmyRFEURbGzs1NSUlKU/Px85amnnlL++usvRaPRKIMHD1Zu3bpV5nY3b95UXnjhBeXu3btKYWGh4u7uruzYsUP5/PPPlVWrVimKoijp6emKs7Nzpf5czp49W6n1HxVpaWnKf/7zHyU7O1vZuHGjsm7dulK/X7duneLm5qa8+eabypQpU+77M9RXJ0+erO0Q6gTJg5bkQUvyoKWPeajW22ddunQBtK+a5+fnk5WVhZ2dHaB9k+rtt9++b5vi22e5ubnMmjWLpk2bYmhoyPnz50lNTWXatGmA9pmRK1euAOjmVz399NOEhYVhbGxMeno68+fPp2HDhty9e5eCgoJSxykeBJqenk5aWho+Pj4A5Obm0r9/fzIyMhg4cCAAPXv2xNDwwalKTU3F2tqawsJC1q1bh4mJCdnZ2bqZZBYWFtjY2ADaZoOdOnUCwNzcnLy8PJo3b37fdleuXKFjx46Ymprqzg3g/PnzxMbGcubMGQAKCwvJyMjA0tKyQn8m9amjdckRH6ampqhUKgwMDDhy5AizZ8/WrZeQkEDTpk3ZtGkTV69eZdGiRZiZmdVi5EIIIeqiai2KVCpVqc/Nmzfn3LlzdOnShRMnTtCuXTvdehqNptS6JiYmrF69mjFjxtCrVy86dOhAp06d+OSTT1CpVGzYsIHHH3+cb7/9lvj4ePr06cOpU6fo3LkzBw8e5OrVq7zzzjukp6fzww8/oNwz91atVgNgaWmJtbU169atw9zcnL1799KwYUPOnz/P6dOncXJy4vfff6ewsLDMczx06BAmJiZYW1sze/ZsVq9eTceOHVm7di0pKSll5uFey5cvv2+7Nm3acOnSJXJzc2nQoAFnzpyhQ4cOdOjQAWtra2bNmkVubi5RUVFV8hD6o6isER/GxsYkJCSUevDdxsaGn3/+mZiYGIyNjQkKCuL27du1GLkQQoi6qEYftA4NDSUkJARFUTAwMCAsLAzQPpczc+ZM3edizZo1Y9GiRQQFBbFlyxaeeeYZ3NzcyM/Pp0ePHrRo0QKAgwcPsnfvXjQaDStWrMDExIR169YxceJEGjRogK2tLWlpaWXGpFarWbJkCTNnzkRRFMzMzFi1ahV9+/bF398fNzc3OnTogJGRkW6b8PBwPv74Y9RqNWZmZrzzzjsAjBo1ijlz5tC0aVOsra3JyMioUF7K2s7KyoqXX34Zd3d3LCwsyMvLw9DQkMmTJxMQEICHhwdZWVm4u7vrCjx986ARH19//XWpz8bGxkRGRpZaVl/ukwshhKg6KuXeSyiPGD8/P5ydnXVvg9UXhYWFfPzxx7rbQFOmTMHHx4e+ffs+1H71cepxWSQPWpIHLcmDluRBS/KgpY95qBOv5Iv7GRoakpOTw9ixYzEyMqJHjx706dOntsMSQggh6q1Hvih66623ajuEajN//nzmz59f22FUu4KCAhYvXkxKSgr5+fnMnj2br776qszO1Dt37mTz5s0UFRXx/PPPM3fu3FqOXgghRH3xyBdFonLqYkfrPXv2YGFhQXh4OBkZGYwdO5b9+/cDpTtTX7lyhc2bNxMdHU2DBg1Yu3YtBQUFpZ73EkIIIf4tKYrquLCwMNq3b4+bmxsAn3/+OTt37kSlUjF37lwGDx5cqf3VtY7WtpamRE0extChQ3XLDAwMdD+X7Ey9ZcsWunfvjq+vL9evX2fWrFlSEAkhhKgyj/yD1vVVeno6ixYt4vLly3h5eeHm5kZ6ejqenp7s2rWLvLw8RowYwf79+//xlf97O1onZeTUxClUiK2lKYsczMjLyyMnJ4e3336bwYMH079/f27dukVoaCgrV65ErVaza9cujhw5QnBwMPn5+QQHBxMaGio9h4QQQpSrog+My5WiGjZv3jymTp2Kvb09Z86cISoqSjfEtKTs7Gy8vb05ePCgbpmVlRW7d+/G0NCQlJQUGjdu/I8FEYC3tzfe3t6AdjxG9+7ljwWpDVevXmXu3LlMmTKFCRMmANqrYq6urro37s6fP0+DBg0YMGAAANu3b8fCwoIePXpU+nj6+FZFWSQPWpIHLcmDluRBSx/zoJ8NbmqRq6srX3zxBQBffPEFEydOLHM9W1tbevbsed9yQ0NDNm7cyKRJk0rdcqqoutjR+saNG0yfPp2FCxfqCiKAI0eOlGq10KtXL44fP05eXh53797l4sWL5c6lE0IIISpDiqIaNnDgQM6ePUtmZiYnT578V/2VPDw8+Pnnnzlx4gRHjx6thihr1gcffMDt27dZt24dnp6eeHp6kpube19n6ieeeILx48fj5ubGlClTmDNnDhYWFrUYuRBCiPpEbp/VMLVazbBhwwgODsbJyanUQ8X/5NKlS6xZs4bIyEiMjIxo0KBBvehmHRAQQEBAwH3L7+1MDTBt2jTd/DshhBCiKklRVAvGjx+Pk5MT3333XaW269ChA126dGHSpEmoVCoGDhyIvb19NUUphBBC6BcpimpBy5YtiY+Pr9C6xQ9IF5s3bx7z5s2rjrCEEEIIvSZFUS1KTU3F19f3vuV9+/bl1VdfrYWIql5Z3ao7deqEn58fKpWKzp07s3TpUtRqNStWrCA2Nha1Wo2vr6/evfUghBCidklRVENOnDiBubk5Xbp04bnnnqNly5ao1Wry8vLo1q0bfn5+5Xab3rhxIx4eHjUYcdUoq1t1ly5d8PHxwcHBgaCgIPbu3YutrS2//vor27dvJzExkfnz57Nz587aDl8IIYQeefSf0n1E7Nixg7S0NN3n//73v0RHR7Nt2zaaN29OREREuduX1cvo36jpMR/Dhg3jtdde0302MDAgPj5e9yyUo6Mjhw8fpnnz5piYmJCfn09WVhaGhlKvCyGEqFnyN085cnNz8ff3JzU1lYKCAoYOHcrBgwfRaDS8+uqrZGZmsmHDBtRqNb1792bBggX8/fffBAcHk5eXR2ZmJnPnzsXa2pqff/6Z+Ph4OnXqdN9xXnrpJZydnfHz8+Pbb7/l888/1/3u3XffZevWrdy6dYvg4GCWLFnC0qVLSUxMRKPR6K64VFRNjvmwtTTlMy9tbFlZWbz66qv4+PiwcuVKXdNJMzMz7ty5g6GhIWq1muHDh3Pnzh1CQkJqJEYhhBCimBRF5diyZQutWrUiIiKC8+fPc/jwYRo3bkxUVBSZmZm4u7uzY8cOTE1NWbhwIYcOHUKlUvHSSy/h4ODAqVOniIyM5H//+x8DBw7E2dkZGxub+45jYmKia6p4+fJlPvroI0xNTQkKCuKXX35h9uzZbNy4keDgYDZt2oSlpSVhYWFkZGTg4eFR5qvrJd075qMmxcXFkZqaypo1axgyZAg2NjYUFRURGxsLwJkzZ8jNzSUyMhK1Ws3KlSvJycnhzTffxMDAACsrq2qLrTgGfSd50JI8aEketCQPWvUlDzLmowpcunRJ11zx8ccfJy4ujvbt2wNw5coV0tPTmTlzJqAdy5GUlETv3r2JiooiJiYGlUpFYWHhPx4nKytLN7+radOm+Pr6YmZmxqVLl3jqqadKrXv+/HliY2M5c+YMAIWFhWRkZGBpafnA/dfmmI8bN26wcOFCli5dyjPPPANAz549KSwsxMHBgd27d+Ps7Ex+fj55eXn07duXoqIimjRpQseOHenQoUO1xKWP7evLInnQkjxoSR60JA9a+pgHKYrK0bFjR86ePYuTkxNJSUmsWbOGMWPGANC6dWtatmzJf//7X4yMjNi5cyd2dna8++67uLq6MmjQIHbs2KEb6aFSqXjQ7N2PP/5Yd9to7dq17N+/H9DeVivepvjfHTp0wNramlmzZpGbm0tUVBRNmjSp8DnV9JiPkt2q161bB8CSJUsIDQ1lzZo1dOjQQTeu5NSpU0yePJmioiJcXFyqrSASQgghyiJFUTkmT57M4sWL8fDwoKioiJdeeomMjAxAO5x12rRpeHp6UlRURKtWrRg+fDjDhg1j+fLlfPjhh7Rs2VK3fs+ePVm9ejWtW7cGYPr06ajVajQaDXZ2dixatAgjIyN69erF2LFjadiwIY0bN9Y9nN2xY0cWLFhAWFgYAQEBeHh4kJWVhbu7e53uav2gbtUbN268b9myZctqIiQhhBCiTCrlQZcvRL2kj5dDyyJ50JI8aEketCQPWpIHLX3MQ929xCCEEEIIUYOkKBJCCCGEQIoiUc0KCgpYuHAh7u7uTJgwgb1795KYmIibmxvu7u4sXboUjUajWz8nJ4fRo0dz8ODBWoxaCCGEPpIHrR8xr7/+Ojdu3AAgJSWFnj17/mM37JJquqN1Rcd8DBkyBNA+bF3c2FEIIYSoSVIUPWKKC6Bbt24xdepU/P39K7V9TXW0Lu5mPWzYMN0r91D2mI9Dhw4xZMgQ1q9fz9NPP/3A1gVCCCFEdZK3z+qIefPmMXXqVOzt7Tlz5gxRUVHlzjsLDQ3liSeewNXV9R/3fW9H66SMnCqL+0FsLU1Z5GCm64uUk5PD22+/zeDBg/n88891PYvi4+PZv38/gwYN4ujRo8yYMYMPPviAZ555hp49e1Z7nEIIIeq/ir5FJ0VRHXHgwAG+/fZbVqxYwZtvvomjoyODBw8uc92bN28ydepU9uzZg4GBQaWOo+1o3b0qQq6wq1evMnfuXN1zRY6Ojrpnhn788UcOHz7MrVu3SElJwcjIiEuXLmFlZcWqVauws7Orlpj08VXTskgetCQPWpIHLcmDlj7mQW6f1REDBw4kPDyczMxMTp48WWbDw2LffvstI0eOrHRBBDXf0frGjRtMnz6doKAg3ZiPrl27cuzYMRwcHDh48CD9+vXD2dlZt42fnx/Ozs7VVhAJIYQQZZG3z+oItVrNsGHDCA4OxsnJqdyC58iRI7qZbHVdyTEfnp6eeHp64uPjQ2RkJJMmTaKgoKDUM0dCCCFEbZErRXXI+PHjcXJy4rvvvit3vYSEBGxtbWsoqodTmTEfxd56663qDEkIIYQokxRFdUjLli2Jj4//x/W+/vrrGohGCCGE0C9SFNVBqamp+Pr63re8b9++vPrqq7UQkRBCCFH/SVFUB9nY2BAdHV3bYTy0goICFi9eTEpKCvn5+cyePZtOnTrh5+eHSqWic+fOLF26FLVazcqVKzl16hSFhYVMmjSJiRMn1nb4Qggh9IwURdXgo48+4vDhw6jValQqFa+//nqZr8EnJyczf/58tm3bVuZ+Sv7ez8+P+Ph4LCwsKCwsxNLSEn9//0o/W1STHa0r2s3a3NycK1eusHXrVvLz8xkxYgRDhw6lSZMmNRarEEIIIUVRFbtw4QL79u1j8+bNqFQq/vjjD3x9fdmzZ89D73vhwoW6t85OnjyJj48PO3bsqNQ+arKjddTkinWz9vPzK/X6fVFREYaG8tUUQghRs+RvnipmZWVFamoqMTExODo6YmdnR0xMDMePH9d1lc7NzWXlypUYGRnptjt+/DgREREYGBhga2vLsmXLyj1Onz59MDIyIjExkbZt25a77r0drWtKQkICeXl5um7Wo0eP5vPPP+fUqVOA9tmpxMRE4uLiACgsLCQqKooBAwZw7ty5ao8vNja22o/xKJA8aEketCQPWpIHrfqSh4o2oZSiqIpZWVkRFRXFxo0bef/99zExMdENcQ0PD6dFixZ88MEHfPvtt7i4uACgKAqBgYFs2rSJpk2b8s477/DFF1/w7LPPlnuspk2bkpGR8Y9Fkbe3N97e3kBxR2uHqjnZCijuZj1lyhQmTJhATEyM7suZkZFBu3bt6N27N7du3eLVV1/F3t6euXPnVntc+tiptSySBy3Jg5bkQUvyoKWPeZCiqIolJibSqFEjVqxYAcDZs2eZOXMmixYtYvny5TRs2JBr167Rq1cv3Tbp6emkpaXh4+MDaK8k9e/f/x+PlZqairW1daXiq8mO1hXtZp2bm8u0adN46aWXGDVqVI3FJ4QQQpQkRVEV+/PPP9m8eTMffPABxsbGtG/fHnNzc8LCwvjpp59o1KgRvr6+pSbBW1paYm1tzbp16zA3N2fv3r00bNiw3OMcOnQIExOTShdFNalkN+viAbBLliwhNDSUNWvW0KFDB4YOHUp0dDRJSUls376d7du3AxAWFvbINKgUQghRP0hRVMVeeOEFLl68iKurKw0bNkRRFBYtWsSJEyeYOHEijRs3plmzZqSlpem2UavVLFmyhJkzZ6IoCmZmZqxatYqcnNLT7MPDw/n4449Rq9WYmZnxzjvv1PTpVUpFu1lPmzaNadOm1VBUQgghRNlUSslLFqLe08d7xGWRPGhJHrQkD1qSBy3Jg5Y+5kEGwgohhBBCILfPRDX57bffWL16NdHR0bq37wBSUlLo2bMnERERhIaGcurUKczMzFiwYAE9e/as5aiFEELoMymKRJX7+OOP2bNnD6ampgBEREQAcOvWLaZOnYq/vz8//fQTCQkJxMTEkJmZyYwZM9i5c2dthi2EEELPye2zR9SXX37JpEmTKr1dTYz5aNOmDZGRkfctj4yMxMPDg+bNm3PhwgUGDhyIWq3GysoKAwMDrl+/Xu2xCSGEEA8iV4oeQX/88QcxMTH8m2fkq3PMh62lKZ95OTB06FCSk5NL/e7mzZscOXIEf39/AOzs7Pjf//7HlClTBqUNVAAAHc9JREFU+Pvvv7lw4cJ9b9sJIYQQNUmKojpi3rx5TJ06FXt7e86cOUNUVBRRUVH3rZeRkcHq1atZvHgxgYGBFdp3TY75iIuLIy8vj+vXr5Odna1rEf/DDz/Qq1cvTp8+DYCpqSk2NjaMHz+eNm3a0KZNGy5fvlyjV4vqS/v6hyV50JI8aEketCQPWvUlDxV+i04RdcL+/fsVPz8/RVEUJTg4WNm3b9996xQWFiqzZ89WLly4oCQlJSmurq6VPs7Zs2cfOtaKuDe+uXPnKnFxcbrPly5dUrZu3aooiqKkpqYqHh4eNRJXsZMnT9bo8eoqyYOW5EFL8qAledDSxzzIM0V1xMCBAzl79iyZmZmcPHkSR0fH+9aJj48nMTGR4OBg5s+fz4ULF1i+fHmljlOTYz5KSkhIKNWh2sbGhp9//pmJEyeyaNEigoKCaiUuIYQQopjcPqsj1Go1w4YNIzg4GCcnJwwMDO5bp0ePHnz99dcAJCcnM3/+fJYsWVLToVZI69at2bZtm+5zcdzFjI2Ny3wYWwghhKgtUhTVIePHj8fJyYnvvvuutkMRQggh9I4URXVIy5YtiY+Pr9C6916JEUIIIcTDkaKoDkpNTcXX1/e+5X379uXVV1+thYgqrmQn65s3bxIQEMDt27cpKipi1apVtGnThm3btrFlyxYMDQ2ZPXs2gwcPru2whRBCCCmKasLOnTu5dOkSCxYsqND6NjY2REdHV2jd/v37c+jQoYcJr8rc28k6PDwcFxcXnJ2dOXr0KJcuXcLU1JTo6Gh27NhBXl4e7u7u9O/fnwYNGtRy9EIIIfSdvH2mZ6qzo/W9naxPnTrFtWvXmDZtGl9++aWuB9PTTz9NgwYNMDc3p02bNpw7d67aYhJCCCEqSq4U1ZDTp0/z4osvkpWVhbe3N6tXr6Zdu3Y0aNCARYsWERwcTF5eHpmZmcydOxcnJydcXFywt7fnzz//RKVSsW7dOho2bEhgYCAXLlzA1taW/Pz8SsVR1R2ti7tYA/d1sk5JSaFx48Zs2LCB9957j48//ph27dphbm6uW8fMzIysrKwqi0cIIYT4t6QoqiGmpqZ89NFHpKen4+rqikajYc6cOXTt2pXDhw/z0ksv4eDgwKlTp4iMjMTJyYns7GxGjBhBYGAgb7zxBgcPHqRhw4bk5eWxbds2UlNTK/SmWnV3tC7uYg2U6mRtZmZG06ZNiY2NpWXLlmzduhVTU1MuX76s65KakpJCSkpKrXRNrS+dWh+W5EFL8qAledCSPGjVlzxUtKO1FEU1pHfv3qhUKpo2bYq5uTmJiYm0b98egMcee4yoqChiYmJQqVQUFhbqtuvatSugfTMtLy+PlJQUevToAWifPWrZsuU/Htvb2xtvb29AW8B07+5Q1aenk5ycjJmZGb1796Zfv35kZmbyn//8h7i4OHr37s3YsWP58ssv6d69O/n5+dy8eZNRo0bVyKDakmJjYyve9r0ekzxoSR60JA9akgctfcyDPFNUQ86ePQtor6TcvXsXS0tL1Gpt+t99911Gjx5NeHg4Dg4OpQa9qlSqUvvp0KGDbn7YtWvXuHbtWqXiqMmO1r6+vuzevZvJkyfz888/M2vWLB577DE8PT1xd3fnxRdf5PXXX6/xgkgIIYQoi1wpqiG5ublMnTqVu3fvsmzZslKdqIcNG8by5cv58MMPadmyJRkZGQ/cj5OTE7Gxsbi6umJjY4OlpWVNhF9hJfsntWrViv/973/3rTNx4kQmTpxY06EJIYQQ5ZKiqAaMGzeOcePGlVq2b98+3c8jR45k5MiR921Xcp2Sr/OX1cNICCGEEA9Hbp8JIYQQQiBFkRBCCCEEIEWReAi//fYbnp6eAPzxxx+4u7vj6emJl5cXN27c0K2n0WiYMWMGmzdvrq1QhRBCiH8kzxQ9QGVHcxQrb+zGiy++iEaj4dKlS1hZWWFhYcGzzz7L7NmzqyLkCqmqN73uHemxfPlyAgMDsbOzY8uWLXz88cf4+/sD8M4773Dr1q0qOa4QQghRXaQoqkGffvopAH5+fjg7O+Po6FjjMTxsR+viDtbFIz0WLVoEwJo1a2jevDkARUVFuuLr22+/RaVS1cq5CiGEEJUhRVE57h3NYWRkxDvvvIOxsTEWFhaEhYVhZmZ239gNjUbD0KFD2b59OxYWFmzatIm7d+8yY8aMMo8zefJkQkJC6Ny5MwcOHGD//v1YWVlx6dIlbt68ye3btwkICKBPnz588803bNiwAbVaTe/evSt0JauqO1rHxcXRrFkzzp07p+teDZCUlMT58+f55JNPCAoKYteuXcTExPDaa6+xc+dO7t69W6e6o9alWGqT5EFL8qAledCSPGjVlzxUuAmlIsq0Y8cOZcaMGYpGo1Fu3LihDB48WBk8eLDy999/K4qiKBs2bFDeeustZd++fcr8+fMVRVGUlJQUpVu3boqiKMq7776rbNy4UVEURZk0aZJy/fp13b59fX2VAwcO6D7HxMQoK1euVBRFUby9vZW4uDhl7dq1ip+fn6IoinL+/HnFxcVFycjIUIYPH67cvXtXURRFWbBggfLLL79U6rzOnj37b9JRpqSkJMXV1VX3+euvv1ZGjhypXLlyRVEURVm5cqUyYcIExcPDQxk8eLDywgsvlDrv2nTy5MnaDqFOkDxoSR60JA9akgctfcyDXCkqR8nRHMXPzrRo0QKAvn37smbNGiwtLcscuzFhwgRef/11+vbtS7NmzWjWrNkDj+Ps7MzYsWPx8vLi77//plu3buzbt49+/foB0LlzZ27cuMGVK1dIT09n5syZAGRnZ5OUlFSpc6qujta7d+9m69atREdHY2FhAaC7tQbaq1XNmjWT22hCCCHqLHn7rBwlR3Pk5eWRk5NDWloaAMePH6ddu3YPHLthY2ODubk5H3zwARMmTCj3OKampjg4OLB8+XJGjx6tWx4fHw/A+fPnadGiBa1bt6Zly5b897//JTo6Gg8PD3r27Fnl511ZRUVFLF++nOzsbLy9vfH09GTt2rW1HZYQQghRKXKlqBwlR3OEhISgKAre3t6oVCqaNGnCihUrsLKyeuDYjYkTJxIaGkp4ePg/HmvixIm4ubkRHBysW/bHH3/w4osvkpOTQ0hICFZWVkybNg1PT0+Kiopo1aoVw4cPr45Tr5CSIz2OHz9e7rrFA2mFEEKIukqKogcoazQHwLPPPnvfsgeN3SgsLGT8+PEYGBiUWv7WW2/dt25RURHDhg2jcePGumXOzs64ubmVWm/06NGlriYJIYQQompIUVRN1qxZw8mTJ1m3bt0/rrtx40Z27Nght5yEEEKIWiRFUTWZP39+hdf18PDAw8Oj1LK6ervpt99+Y/Xq1URHR5OYmIifnx8qlYrOnTuzdOlS1Go1ERERHD58GJVKRUBAgO5BdCGEEKIuk6KoEoqKipg5cyZ3797lgw8+oEmTJv+4TXFn7E6dOrFjxw7y8vK4cOEC3bp1A2D16tW6N9rqunu7WK9YsQIfHx8cHBwICgpi7969tGrVitOnT7Nt2zZSUlKYM2cOe/bsqeXIhRBCiH8mb59VwvXr18nIyGDz5s0VKohKGjNmDNHR0axZs4ZOnToRHR1NdHR0jRdEDzPmo7iLdbH4+Hjs7e0BcHR05PDhw3Tt2pX169ejUqlITU0ttxWBEEIIUZfIlaJKCAwM5PLlywQFBWFnZ4ebmxsXL14kODiY6Ohojh8/TkREBAYGBtja2rJs2bJ/3Ocvv/zCtm3bdM8TTZ48mbVr1+Lm5kbPnj25cuUKnTt31r3yvmTJEjIyMgAICAjgiSeeqNQ5/JsxH8WjPYYOHUpycrJuuaIoqFQqAMzMzLhz5w4AhoaGRERE8NlnnxEYGFipYwkhhBC1RYqiSli6dCnz58/nscceu+93iqIQGBjIpk2baNq0Ke+88w5ffPEFhoblp7h///6EhoZy69Ytrl+/jqWlJc2bN+fatWu89tprtG3bltdee40ff/yR3377jX79+uHu7s7ly5fx9/ev0OT5qhjzERcXR15eHtevX9eN9igqKtK1gD9z5gy5ubm6z46OjvTt25egoCBMTEzq5C3C+tK+/mFJHrQkD1qSBy3Jg1Z9yUNFx3xIUVRF0tPTSUtLw8fHB9D2OOrfvz9t2rQpdzuVSsWoUaP46quvSE5O1jV6bNmyJW3btgXg6aefJiEhgfPnz3P06FG++eYbAG7fvl2h2Ly9vXUPbsfFxdG9u8O/OkeA5ORkzMzM6N27Nz179qSwsBAHBwd2796Ns7Mz+fn5fP/99yxdupSCggLMzc158sknsbW1/dfHrA6xsbEVn4VTj0ketCQPWpIHLcmDlj7mQYqif8HY2Jjr168D/9d12tLSEmtra9atW4e5uTl79+6lYcOGXL169R/3N378eBYsWEBOTg5vvPEGoO2Off36dR577DFOnTrF6NGjSU9PZ9SoUbi4uHDz5k22b99e6dircsyHr68vgYGBrFmzhg4dOjB06FAAvv32WyZPnoxGo2HKlCl1riASQgghyiJF0b8wfPhwfHx8OHHiBN27dwdArVazZMkSZs6ciaIomJmZsWrVqgoVRS1atMDMzIynnnpKd7utQYMGhISEcPXqVXr27Mlzzz1Hr169WLJkCdu2bSMrK4t58+ZV63mWpWQX6/bt27Nx48b71nnzzTdrOiwhhBDioUlRVAklC4IdO3bc9/sBAwYwYMCAUsvu7Ypdch8lKYpSakaasbHxfc0cLS0tK9QMUgghhBCVJ6/k17Lc3FzGjRtHly5ddM8QCSGEEKLmyZWiWmZiYsLOnTvvW37o0KFaiOb/jBkzBnNzc0B7dWvEiBGsXr0aU1NTBg4cyJw5c2o1PiGEEKKqSVFUBxw7dowtW7YQERFR26EA//cwdnR0NAAajYbnnnuO6OhobG1tWbBgASdPnqRPnz61GaYQQghRpeT2mZ6pSEfrc+fOkZOTw/Tp05k6dSqxsbE0btxY9xZZr169OHXqVHWHKoQQQtQouVL0L+Xm5rJo0SLS0tJo2bIlJ06cYP369YSGhgJgYWFBWFgYDRs2JCgoiL///puMjAwcHR3x8fHBz8+PzMxMMjMz8fLyIjExES8vLzIyMnBzc8PV1ZXff/+dkJAQDAwMMDY2JiQkhIyMDHx9fdm2bRvffPMNBw8e5N13361w3OV1tC7uXG1iYoKXlxeurq5cvnyZl19+GY1Gw8WLF2nXrh0HDx6kS5cuVZFGIYQQos5QKYqi1HYQj6JPP/2Ua9eusWjRIi5evMjIkSN58sknCQsLo1OnTmzfvp3k5GRcXV05cuQIrq6u5OXl4ejoyLFjx/Dz86NLly5MmzaNY8eOERoayo4dO9BoNIwePZrNmzczY8YMli9fjp2dHT/++CN79uxh7dq1bNiwgVOnTpGcnMxnn31Go0aNyo313o7WSRk5Za5na2nKIgczsrKyUBSFBg0aANpxIh4eHmzfvh0zMzNatGiBlZUVw4cPr9qkCiGEENVAOlpXs4sXL+Lo6AhAx44dsbKy4uLFi7oePQUFBbRv3x4LCwvOnj3L0aNHadSoEfn5+bp9tG/fXvfzU089pStCOnbsSHJyMmlpadjZ2QHQt29f3n77bUA7H+39999nzpw5/1gQQeU7Wm/atInz588THBzMtWvXUBSFtLQ0Nm3ahKmpKfPmzWPSpEl07ty5oumqc/SxU2tZJA9akgctyYOW5EFLH/MgRdG/9Pjjj/Prr7/i5OTElStXyMjIoGvXrqxcuRIbGxtiY2O5fv06O3fuxNzcnGXLlpGYmMi2bdsovjhXPEwV4Pfff6ewsJD8/HwuXrxImzZtaN68OefOnaNLly6cOHGCdu3aAbBq1Sq8vLzYuXMnTk5OleoYXZGO1hMmTMDf3x83NzdUKhVhYWFcuHABNzc3TExMcHFxeaQLIiGEEKIsUhT9SxMmTMDPz48pU6ZgY2ODsbExwcHB+Pr6UlRUBMDy5cvp2LEj8+fPJzY2FlNTU9q2bUtaWtp9+zM2Nubll1/m9u3beHt7Y2FhQWhoKCEhISiKgoGBAWFhYezdu5fLly8TGBjIU089xYIFC9i4cSNGRkZVdm4NGjTQXZUq1qtXLyZOnFhlxxBCCCHqGnmm6F86deoUd+/eZcCAAVy+fJkZM2bw448/1nZY/0gfL4eWRfKgJXnQkjxoSR60JA9a+pgHuVL0L9na2jJ//nzee+89CgsLCQoKqu2QhBBCCPEQpCj6lx577DFdc0MhhBBCPPqkeaMQQgghBFIUCSGEEEIAUhQJIYQQQgBSFOmdw4cP13YIdYLkQUvyoCV50JI8aEketPQxD/JKvp554okn+PPPP2s7jFonedCSPGhJHrQkD1qSBy19zINcKRJCCCGEQIoiIYQQQggADIKDg4NrOwhRsxwcyh8Iqy8kD1qSBy3Jg5bkQUvyoKVveZBnioQQQgghkNtnQgghhBCAFEVCCCGEEIAURUIIIYQQgBRFQgghhBCAFEVCCCGEEIAURUIIIYQQABjWdgCiZmg0GoKDg/nzzz9p0KABoaGhtG3btrbDqlZjxozB3NwcgNatWzNp0iSWL1+OgYEBAwYMYN68efU6L7/99hurV68mOjqaxMRE/Pz8UKlUdO7cmaVLl6JWq3nvvffYv38/hoaGLF68mB49ejxw3UdVyTzEx8cza9Ys2rVrB4CbmxvOzs71Og8FBQUsXryYlJQU8vPzmT17Np06ddK770NZebC2tta770NRUREBAQEkJCRgYGDAihUrUBRF774PD6QIvfDdd98pvr6+iqIoyq+//qrMmjWrliOqXrm5ucro0aNLLRs1apSSmJioaDQaZcaMGUpcXFy9zctHH32kjBw5UnF1dVUURVFeeeUV5ejRo4qiKEpgYKDy/fffK3FxcYqnp6ei0WiUlJQUZdy4cQ9c91F1bx62bdumrF+/vtQ69T0PMTExSmhoqKIoipKenq4MGjRIL78PZeVBH78PP/zwg+Ln56coiqIcPXpUmTVrll5+Hx6kHpV3ojyxsbEMHDgQgKeeeoq4uLhajqh6nTt3jpycHKZPn87UqVM5ceIE+fn5tGnTBpVKxYABAzhy5Ei9zUubNm2IjIzUfY6Pj8fe3h4AR0dHDh8+TGxsLAMGDEClUmFjY0NRURHp6ellrvuoujcPcXFx7N+/nylTprB48WKysrLqfR6GDRvGa6+9pvtsYGCgl9+HsvKgj98HJycnQkJCAEhNTaVZs2Z6+X14ECmK9ERWVhaNGjXSfTYwMKCwsLAWI6peJiYmeHl5sX79et588038/f0xNTXV/d7MzIw7d+7U27wMHToUQ8P/uzuuKAoqlQp48LkXLy9r3UfVvXno0aMHixYt4vPPP8fW1pb333+/3ufBzMyMRo0akZWVxauvvoqPj49efh/KyoM+fh8ADA0N8fX1JSQkhKFDh+rl9+FBpCjSE40aNSI7O1v3WaPRlPrLor5p3749o0aNQqVS0b59e8zNzcnMzNT9Pjs7m8aNG+tNXkre83/QuWdnZ2Nubl7muvXFkCFD6N69u+7n33//XS/ycPXqVaZOncro0aNxcXHR2+/DvXnQ1+8DwMqVK/nuu+8IDAwkLy9Pt1yfvg9lkaJIT/Tq1YuDBw8CcPr0aR5//PFajqh6xcTE8NZbbwFw7do1cnJyaNiwIVeuXEFRFH755Rf69OmjN3np2rUrx44dA+DgwYO6c//ll1/QaDSkpqai0WiwsrIqc936wsvLizNnzgBw5MgRunXrVu/zcOPGDaZPn87ChQuZMGECoJ/fh7LyoI/fh127dvHhhx8CYGpqikqlonv37nr3fXgQGQirJ4rfsjp//jyKohAWFkbHjh1rO6xqk5+fj7+/P6mpqahUKhYsWIBarSYsLIyioiIGDBjA66+/Xq/zkpyczPz589m2bRsJCQkEBgZSUFBAhw4dCA0NxcDAgMjISA4ePIhGo8Hf358+ffo8cN1HVck8xMfHExISgpGREc2aNSMkJIRGjRrV6zyEhobyzTff0KFDB92yJUuWEBoaqlffh7Ly4OPjQ3h4uF59H+7evYu/vz83btygsLCQl19+mY4dO+rtfx/uJUWREEIIIQRy+0wIIYQQApCiSAghhBACkKJICCGEEAKQokgIIYQQApCiSAghhBACkKJICPEISU5Opnv37owePbrUP1evXq30vpKSkli8eHGVx3j27FmWLFlS5ft9kDNnzhAeHl5jxxOiPqt/rXuFEPVa8+bN2b1790PvJzU1laSkpCqIqLQnn3ySJ598ssr3+yAXLlzg5s2bNXY8IeozKYqEEPXCjRs3CAoK4u+//0alUvHGG2/w7LPPcu3aNRYvXsydO3dIS0tj7NixvPbaa4SGhpKcnMybb77JsGHDeO+994iOjgbAz88Pe3t77O3tmTFjBpaWlpiYmPDJJ5+watUqjh8/TlFREePGjWPatGml4jh27JhuX56ennTt2pXY2Fjy8vJYsGABn332GRcvXmTatGlMmzaNyMhIUlNTuXjxIhkZGUyaNIkZM2ag0WgICwvjyJEjqFQqRo0axcyZMzl27Bjh4eFoNBpatGjBH3/8wd27d4mKisLT05PFixdz7do10tLSeOaZZ1i+fDnHjx/nww8/xMTEhIsXL/LEE0+wevVqGjRowIYNG9i8eTMGBgYMHjyYhQsXPjCXQtR3UhQJIR4paWlpjB49WvfZxcWFGTNmsHz5csaPH8/zzz9PWloa7u7u7Nq1i6+++oqRI0cyduxY7ty5w6BBg/D09CQgIID33nuPpUuX6sYWlCUhIYFPPvmE1q1bs3nzZgC++OIL8vPz8fLyonv37uWOOlAUhZiYGN577z1CQ0PZs2cP6enpjBkzRldQxcXFsWXLFjQaDePGjeOZZ57h9OnTXL16lT179pCfn4+npyePP/44pqamXL58mZ9++glzc3N27tzJ8ePHmT17Nl999RV2dnasXbuW/Px8RowYQXx8PAC//vor33zzDc2bN2fixIn88ssvNGvWjE2bNrFjxw5MTU2ZMWMGcXFxrF+/vsxclhwQKkR9JEWREOKR8qDbZ4cPH+bSpUusXbsWgMLCQpKSkvDy8uLo0aOsX7+ev/76i4KCAnJycip8vKZNm9K6dWtAOx/rjz/+4OjRo4B2ZMKff/5ZblHk6OgIgI2NDT179sTU1JRWrVpx+/Zt3TojR47EzMwMgOeee46jR4/y22+/MXbsWAwMDDA1NcXFxYUjR47w3HPP6YYc32vkyJGcOXOGDRs2cOnSJTIzM7l79y4AnTt3xtraGoCOHTty69YtEhISGDx4sG5fGzZsKDeXdnZ2Fc6bEI8iKYqEEPWCRqPh008/xcLCAtBeUWratClvvfUWSUlJjBw5EicnJw4fPsy9041UKlWpZQUFBbqfTUxMdD8XFRWxcOFCXnjhBQDS09N1xcyDGBkZ6X42NCz7P7klZ0dpNBoMDAzQaDSl1lEUhaKiovtiKik6OprvvvuOiRMn8uyzz+pm+gEYGxvfd76GhoaoVCrd8mvXrmFqavrAXApR38nbZ0KIeqFfv35s2rQJ0D587OLiQk5ODocOHcLLy4vhw4eTkJDAtWvXdIVHYWEhAJaWliQlJZGXl0dmZiaxsbEPPMa2bdsoKCggOzsbd3d3Tp8+/dCx//jjj+Tn53Pr1i1++uknBgwYQL9+/di1axdFRUXk5OTw5Zdf4uDgcN+2Jc/j0KFDTJo0iVGjRpGXl8e5c+fuK65K6tOnDwcOHCA7O5vCwkLeeOMN4uLiHphLIeo7uVIkhKgXAgICCAoKwsXFBYBVq1bRqFEjXnnlFRYtWoSJiQnW1tZ0796d5ORk7OzsuHPnDgsXLiQ8PJxBgwYxYsQIWrVqRe/evcs8xuTJk0lMTGTs2LEUFhYybty4MguVyjI2Nsbd3Z2srCxeeeUVOnXqRNu2bbl8+TKjR4+moKAAFxcXhgwZct/zTz169OC9995j9erVvPjiiwQHB/PRRx/RqFEjnn76aZKTk2nTpk2Zx+3WrRseHh5MnjwZjUbDkCFDePbZZ+nYsWOZuRSivlMp915HFkIIUWMiIyMB8Pb2ruVIhBBy+0wIIYQQArlSJIQQQggByJUiIYQQQghAiiIhhBBCCECKIiGEEEIIQIoiIYQQQghAiiIhhBBCCAD+H8RTafObnI6pAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 576x396 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "lgb_feat_selection(data,label,k=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_item=['regDate','regionCode','model',\"name\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_item=['power','kilometer','v_2','v_0','v_9','v_5']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Time series"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "相对时间和绝对时间，节假日，双休日等,赛题中creatDate和regDate是时间序列特征；regDate - 汽车注册时间；creatDate - 卖车广告发布时间，因此可利用二者时间差推断汽车使用时间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "#刊登卖车广告时间晚于注册时间，时间差可利用建立使用时间特征\n",
    "data[\"used_time\"]=(pd.to_datetime(data.creatDate,format='%Y%m%d', errors='coerce')-pd.to_datetime(data.regDate,format='%Y%m%d', errors='coerce')).dt.days #强制进行转换，不满足转换要求，则设为缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>nullrate</th>\n",
       "      <th>nullrate%</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>price</th>\n",
       "      <td>50000</td>\n",
       "      <td>0.276034</td>\n",
       "      <td>27.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>used_time</th>\n",
       "      <td>11951</td>\n",
       "      <td>0.065978</td>\n",
       "      <td>6.6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               0  nullrate  nullrate%\n",
       "price      50000  0.276034       27.6\n",
       "used_time  11951  0.065978        6.6"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my.Nullrate(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>creatDate</th>\n",
       "      <th>regDate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>20160311</td>\n",
       "      <td>19970004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>20160326</td>\n",
       "      <td>19970008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>20160328</td>\n",
       "      <td>19960009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>20160325</td>\n",
       "      <td>20020006</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>20160325</td>\n",
       "      <td>19990007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49916</th>\n",
       "      <td>20160324</td>\n",
       "      <td>20150008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49925</th>\n",
       "      <td>20160316</td>\n",
       "      <td>20020003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49935</th>\n",
       "      <td>20160307</td>\n",
       "      <td>19960007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49938</th>\n",
       "      <td>20160319</td>\n",
       "      <td>20080004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49939</th>\n",
       "      <td>20160316</td>\n",
       "      <td>19950006</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       creatDate   regDate\n",
       "20      20160311  19970004\n",
       "22      20160326  19970008\n",
       "42      20160328  19960009\n",
       "51      20160325  20020006\n",
       "69      20160325  19990007\n",
       "49916   20160324  20150008\n",
       "49925   20160316  20020003\n",
       "49935   20160307  19960007\n",
       "49938   20160319  20080004\n",
       "49939   20160316  19950006"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data.used_time.isnull()][[\"creatDate\",\"regDate\"]].head().append(data[data.used_time.isnull()][[\"creatDate\",\"regDate\"]].tail())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "used_time的缺失值是由于regDate数据异常引起，月份不能是0；因此提取两个时间的年份信息，再转化为天数作为填充。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.loc[(data.used_time.isnull()),\"used_time\"] = (data.loc[(data.used_time.isnull()),\"creatDate\"].apply(lambda x:eval(str(x)[0:4]))-\n",
    "                                                data.loc[(data.used_time.isnull()),\"regDate\"].apply(lambda x:eval(str(x)[0:4])))*365"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "广告发布时间以及注册时间如果是节假日可能也会影响价格，因此创建一个节假日特征。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# \"\"\"\n",
    "# from chinese_calendar import is_workday, is_holiday,is_in_lieu  #判断日期是否工作日；节假日；调休\n",
    "# #由于时间序列数据是德国的日历；因此不能用中国日历来创建特征。\n",
    "# creat_time=pd.to_datetime(data.creatDate,format='%Y%m%d', errors='coerce')\n",
    "# data[\"is_workday_creat\"]=creat_time.apply(lambda x:is_workday(x) )\n",
    "# data[\"is_holiday_creat\"]=creat_time.apply(lambda x:is_holiday(x) )\n",
    "# data[\"is_in_lieu_creat\"]=creat_time.apply(lambda x:is_in_lieu(x) )\n",
    "#由于chinese_calendar只支持2003-2020年日历查询，而regDate大多数是2004年之前的数据，因此不创建节假日特征。\n",
    "# \"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[\"reg_time\"]=data.regDate.apply(lambda x:eval(str(x)[0:6])) #抽取年份+月份信息;用于后续统计信息创建"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "creat_time=pd.to_datetime(data.creatDate,format='%Y%m%d', errors='coerce')\n",
    "data[\"year_creat\"]=creat_time.dt.year\n",
    "data[\"month_creat\"]=creat_time.dt.month\n",
    "data[\"week_creat\"]=creat_time.dt.weekday"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[\"year_reg\"]=creat_time.dt.year\n",
    "data[\"month_reg\"]=creat_time.dt.month\n",
    "data[\"week_reg\"]=creat_time.dt.weekday"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "def used_time_cut(x): #按照使用年限进行切割\n",
    "    if x<=365:\n",
    "        return 0\n",
    "    elif x<=1095:\n",
    "        return 1\n",
    "    elif x<=2555:\n",
    "        return 2\n",
    "    else:\n",
    "        return 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[\"used_time_cut\"]=data.used_time.apply(lambda x:used_time_cut(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_feat=cat_feat+[\"reg_time\",\"used_time_cut\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'MAE': 558.27}"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kvalid(data[data.SaleID<200000].drop(\"price\",axis=1),bc_y_train,k=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* location"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "经测试，location信息可能会造成信息泄露，故不采用该特征"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.2.1 Feature combination"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Base on experience"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[\"name_len\"]=data[\"name\"].apply(lambda x : len(str(x)))\n",
    "data[\"kilo_perday\"]=data[\"kilometer\"]/data[\"used_time\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "data1=data.copy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Base on gplearn-with multiple non-linear functions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- gplearn 有目的地约束了符号回归问题。\n",
    "- 手工特征工程中，我们会对两个或者多个特征进行一些加减乘除的操作，来生成一些特征，希望能够生成一些根据领域的先验知识。\n",
    "- 对金额特征, 日期特征进行比值操作生成一些特征，这些特征经常能够提升验证集和测试集的分数，在模型中也有很高的重要程度。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "def Gp_feat_gen(train_x,bc_y_train):\n",
    "    from gplearn.genetic import SymbolicTransformer\n",
    "    function_set = ['add', 'sub', 'mul', 'div',\n",
    "                    'sqrt', 'log', 'abs', 'neg', 'inv',\n",
    "                    'max', 'min']\n",
    "    gp = SymbolicTransformer(generations=30, population_size=1800,\n",
    "                             hall_of_fame=100, n_components=10,\n",
    "                             function_set=function_set,\n",
    "                             parsimony_coefficient=0.0005,\n",
    "                             max_samples=0.9, verbose=0,\n",
    "                             random_state=42, n_jobs=-1)\n",
    "    gp.fit(train_x,bc_y_train)\n",
    "    return pd.DataFrame(gp.transform(train_x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "# gp_feat=Gp_feat_gen(data[data.SaleID<200000].drop(label,axis=1),bc_y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "#效果不好"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Base on statistics"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "重要特征的计数、均值、标准差等统计值；\n",
    "匿名特征的计数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "def count_feat(df,cols): #统计自身的各值频数\n",
    "    for col in cols:\n",
    "        newF1 = df.groupby([col])[col].count().to_frame().rename(columns={col:col+'__count'})\n",
    "        df = pd.merge(df, newF1, on=[col], how='left')\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['SaleID', 'name', 'regDate', 'model', 'brand', 'bodyType', 'fuelType',\n",
       "       'gearbox', 'power', 'kilometer', 'notRepairedDamage', 'regionCode',\n",
       "       'creatDate', 'price', 'v_0', 'v_1', 'v_2', 'v_3', 'v_4', 'v_5', 'v_6',\n",
       "       'v_7', 'v_8', 'v_9', 'v_10', 'v_11', 'v_12', 'v_13', 'v_14',\n",
       "       'used_time', 'reg_time', 'year_creat', 'month_creat', 'week_creat',\n",
       "       'year_reg', 'month_reg', 'week_reg', 'used_time_cut', 'name_len',\n",
       "       'kilo_perday'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=count_feat(data,cols=cat_item+num_item)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "def stat_feat_label(df,onecatcols): #统计类别特征与标签的统计值 \n",
    "    Train= df[df.SaleID<200000] #分离出训练集\n",
    "    funcs=[\"max\",\"min\",\"mean\",\"sum\",\"median\",\"std\",\"skew\"]\n",
    "    for col in onecatcols:\n",
    "        gb=Train.groupby(col)\n",
    "        dfgb=gb[label].agg(funcs) \n",
    "        df=df.merge(dfgb,how=\"left\",on=col)\n",
    "        df.rename(columns={\"max\":col+\"_\"+label+\"_\"+\"max\",\"min\":col+\"_\"+label+\"_\"+\"min\",\"mean\":col+\"_\"+label+\"_\"+\"mean\",\n",
    "                     \"sum\":col+\"_\"+label+\"_\"+\"sum\",\"median\":col+\"_\"+label+\"_\"+\"median\",\"std\":col+\"_\"+label+\"_\"+\"std\",\n",
    "                     \"skew\":col+\"_\"+label+\"_\"+\"skew\"},inplace=True)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of dataframe is 153966450.00 MB\n",
      "Memory usage after optimization is: 53616552.00 MB\n",
      "Decreased by 65.2%\n"
     ]
    }
   ],
   "source": [
    "data=my.reduce_mem_usage(stat_feat_label(data,cat_feat))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "def stat_feat(df,onecatcols,onenumcols): #统计类别特征与标签的统计值\n",
    "    for col in onecatcols:\n",
    "        for j in onenumcols: \n",
    "            gb=df.groupby(col)\n",
    "            funcs=[\"max\",\"min\",\"mean\",\"sum\",\"median\",\"std\",\"skew\"]\n",
    "            dfgb=gb[j].agg(funcs)\n",
    "            df=df.merge(dfgb,how=\"left\",on=col)\n",
    "            df.rename(columns={\"max\":col+\"_\"+j+\"_\"+\"max\",\"min\":col+\"_\"+j+\"_\"+\"min\",\"mean\":col+\"_\"+j+\"_\"+\"mean\",\n",
    "                         \"sum\":col+\"_\"+j+\"_\"+\"sum\",\"median\":col+\"_\"+j+\"_\"+\"median\",\"std\":col+\"_\"+j+\"_\"+\"std\",\n",
    "                         \"skew\":col+\"_\"+j+\"_\"+\"skew\"},inplace=True)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of dataframe is 156864642.00 MB\n",
      "Memory usage after optimization is: 117376776.00 MB\n",
      "Decreased by 25.2%\n"
     ]
    }
   ],
   "source": [
    "df_stat=my.reduce_mem_usage(stat_feat(data,cat_item,num_item))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "加入的统计型特征变量有利于提高精度"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.2.2 Numeric feature"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Data binning"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "等频分箱；等距分箱；Best-KS 分箱（类似利用基尼指数进行二分类）；卡方分箱；聚类分箱。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 分箱算法的好处等同于数据离散化的好处，大致有下面几点：\n",
    "* 1.可以减少过拟合的风险，因为分箱相当于对于数据去粗粒度描述。\n",
    "* 2.增加稀疏数据的概率，减少计算量，因为0的数据变多了。\n",
    "* 3.减少噪声数据的影响，比如一组数据按照0~100均匀分布，当数据中突然出现一个10000的数据，如果不做分箱的化会对Logistic Regression这种模型的训练造成很大影响。\n",
    "* 4.方便特征衍生，因为数据离散化后就可以把特征直接相互做内积提升特征维度。\n",
    "* 5.离散化后可以提升模型的鲁棒性，比如我们有一组数据是年龄，比如A30岁、B50岁，到了第二年A变成31岁，B变成51岁，所有数据都变了理论上要更新模型。但是如果数据分箱了之后，比如分箱逻辑是小于40岁为0，大于40岁为1，则第一年和第二年数据没有变化，模型也不用变化。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# #等距分箱\n",
    "# le = LabelEncoder()\n",
    "# for col in continuous_feature:\n",
    "#     data[col+\"_cut\"]=le.fit_transform(pd.cut(data[col],bins=4).astype(\"category\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# 聚类划分\n",
    "from sklearn.cluster import KMeans\n",
    "def KMeans_cut(df,col,k=5):\n",
    "    kmeans = KMeans(n_clusters=k)  # 离散为 k 等份\n",
    "    kmeans.fit_transform(df[col].values.reshape(-1, 1)) # 只取一个特征进行聚类离散化\n",
    "    df[col+\"_cut\"]=kmeans.labels_\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in num_item:\n",
    "    data=KMeans_cut(data,i,k=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of dataframe is 63206720.00 MB\n",
      "Memory usage after optimization is: 59946254.00 MB\n",
      "Decreased by 5.2%\n"
     ]
    }
   ],
   "source": [
    "my.reduce_mem_usage(data).to_csv(\"data.csv\",index=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  1.2.3 Categorical featrue"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(when using xgb and not tree model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "import category_encoders as ce"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 无序离散特征:OneHot, Hashing, LeaveOneOut,target encoding 方法效果较好，但是使用OneHot时要避免高基类别的特征以及基于决策树的模型。\n",
    "* 有序离散特征:尝试 Ordinal, Binary, OneHot, LeaveOneOut, and Target. \n",
    "* 高基数特征: LeaveOneOut、 WeightOfEvidence、 James-Stein、M-estimator 适合用来处理高基数特征。\n",
    "* 回归问题:Target 与 LeaveOneOut 方法可能不会有比较好的效果。\n",
    "* Helmert、 Sum、 Backward Difference、 Polynomial 在机器学习问题里的效果往往不是很好(过拟合的原因)\n",
    "* Helmert, Sum, BackwardDifference and Polynomial 基本没啥用，但是当你有确切的原因或者对于业务的理解的话，可以进行尝试。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* label encoding\n",
    "特征存在内在顺序 (ordinal feature)\n",
    "* one hot encoding\n",
    "特征无内在顺序，category数量 < 4\n",
    "* target encoding (mean encoding, likelihood encoding, impact encoding)\n",
    "特征无内在顺序，category数量 > 4\n",
    "* beta target encoding\n",
    "特征无内在顺序，category数量 > 4, K-fold cross validation\n",
    "* 不做处理（模型自动编码）\n",
    "CatBoost，lightgbm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_xgb=data.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>nunique</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>name</th>\n",
       "      <td>116164</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>regionCode</th>\n",
       "      <td>7975</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>reg_time</th>\n",
       "      <td>325</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>model</th>\n",
       "      <td>248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>brand</th>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bodyType</th>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fuelType</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>used_time_cut</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gearbox</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>notRepairedDamage</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   nunique\n",
       "name                116164\n",
       "regionCode            7975\n",
       "reg_time               325\n",
       "model                  248\n",
       "brand                   40\n",
       "bodyType                 8\n",
       "fuelType                 7\n",
       "used_time_cut            4\n",
       "gearbox                  2\n",
       "notRepairedDamage        2"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_cate_nuiq=data[cat_feat].nunique().to_frame()\n",
    "data_cate_nuiq.columns=[\"nunique\"]\n",
    "data_cate_nuiq.sort_values(by=\"nunique\",ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "high_cat=data_cate_nuiq[data_cate_nuiq[\"nunique\"]>=10].index.tolist() #CatBoost/LeaveOneOut/rank/count\n",
    "low_cat=data_cate_nuiq[data_cate_nuiq[\"nunique\"]<10].index.tolist() # CatBoost/onehot/target encoding/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['name', 'model', 'brand', 'regionCode', 'reg_time']"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "high_cat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['bodyType', 'fuelType', 'gearbox', 'notRepairedDamage', 'used_time_cut']"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "low_cat"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* low_cate with onehot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "ohe=ce.one_hot.OneHotEncoder(verbose=0, cols=low_cat, drop_invariant=True, return_df=True, handle_missing='value', handle_unknown='value', use_cat_names=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_ohe=ohe.fit_transform(data[low_cat+[\"SaleID\"]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_xgb=pd.merge(data_xgb,df_ohe,how=\"left\",on=\"SaleID\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_xgb.drop(low_cat,axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['SaleID', 'name', 'regDate', 'model', 'brand', 'power', 'kilometer',\n",
       "       'regionCode', 'creatDate', 'price',\n",
       "       ...\n",
       "       'fuelType_5.0', 'fuelType_6.0', 'gearbox_0.0', 'gearbox_1.0',\n",
       "       'notRepairedDamage_0.0', 'notRepairedDamage_1.0', 'used_time_cut_3.0',\n",
       "       'used_time_cut_2.0', 'used_time_cut_1.0', 'used_time_cut_0.0'],\n",
       "      dtype='object', length=144)"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_xgb.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* high_cate with target encoding/m_estimate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "#target_encoder\n",
    "tge=ce.target_encoder.TargetEncoder(verbose=0, cols=high_cat, drop_invariant=True, \n",
    "                                               return_df=True, handle_missing='value', handle_unknown='value',\n",
    "                                               min_samples_leaf=1, smoothing=1.0)\n",
    "X_train=data[data.SaleID<200000].drop(\"price\",axis=1)\n",
    "y_train=data.loc[data.SaleID<200000,\"price\"]\n",
    "df_tge=tge.fit(X_train,y_train).transform(data.drop(\"price\",axis=1))\n",
    "df_tge_onlyhighcate=df_tge[high_cat+[\"SaleID\"]]\n",
    "df_tge_onlyhighcate.columns=[str(i)+\"_tge\" for i in high_cat+[\"SaleID\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(50000, 6)"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tge_onlyhighcate[df_tge_onlyhighcate.SaleID_tge>=200000].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_xgb=pd.merge(data_xgb,df_tge_onlyhighcate,how=\"left\",left_on=\"SaleID\",right_on=\"SaleID_tge\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_xgb.drop(high_cat+[\"SaleID_tge\"],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of dataframe is 95278062.00 MB\n",
      "Memory usage after optimization is: 57782703.00 MB\n",
      "Decreased by 39.4%\n"
     ]
    }
   ],
   "source": [
    "my.reduce_mem_usage(data_xgb).to_csv(\"data_xgb.csv\",index=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.3 Feature Selection"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 过滤式（filter）：先对数据进行特征选择，然后在训练学习器，常见的方法有 Relief/方差选择法/相关系数法/卡方检验法/互信息法；\n",
    "* 包裹式（wrapper）：直接把最终将要使用的学习器的性能作为特征子集的评价准则，常见方法有 LVM（Las Vegas Wrapper） ；\n",
    "* 嵌入式（embedding）：结合过滤式和包裹式，学习器训练过程中自动进行了特征选择，常见的有 lasso 回归；\n",
    "* 降维:PCA/ LDA/ ICA."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "feature_selector可以滤除以下特征：\n",
    "* 具有高missing-values百分比的特征\n",
    "* 具有单个值的特征（即数据集中该特征取值的集合只有一个元素）(方差为0)\n",
    "* 具有高相关性的特征\n",
    "* 对模型预测结果无贡献的特征（即zero importance）\n",
    "* 对模型预测结果只有很小贡献的特征（即low importance）"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.3.1 pytools: feature_selector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "from feature_selector import FeatureSelector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs = FeatureSelector(data =data[data.SaleID<200000].drop(\"price\",axis=1), labels = data.loc[data.SaleID<200000,\"price\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs_xgb=FeatureSelector(data_xgb[data_xgb.SaleID<200000].drop(\"price\",axis=1), labels = data_xgb.loc[data_xgb.SaleID<200000,\"price\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 综合使用全部方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 features with greater than 0.50 missing values.\n",
      "\n",
      "1 features with a single unique value.\n",
      "\n",
      "31 features with a correlation magnitude greater than 0.98.\n",
      "\n",
      "Training Gradient Boosting Model\n",
      "\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 363.553\tvalid_0's l2: 760779\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Early stopping, best iteration is:\n",
      "[816]\tvalid_0's l1: 357.368\tvalid_0's l2: 738687\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 365.094\tvalid_0's l2: 736951\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 356.759\tvalid_0's l2: 744932\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 363.429\tvalid_0's l2: 736341\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[999]\tvalid_0's l1: 360.364\tvalid_0's l2: 728313\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[998]\tvalid_0's l1: 364.343\tvalid_0's l2: 724784\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 357.826\tvalid_0's l2: 694737\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 365.64\tvalid_0's l2: 790897\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 365.233\tvalid_0's l2: 800773\n",
      "\n",
      "24 features with zero importance after one-hot encoding.\n",
      "\n",
      "78 features required for cumulative importance of 0.99 after one hot encoding.\n",
      "47 features do not contribute to cumulative importance of 0.99.\n",
      "\n",
      "56 total features out of 125 identified for removal after one-hot encoding.\n",
      "\n",
      "['missing', 'single_unique', 'collinear', 'zero_importance', 'low_importance'] methods have been run\n",
      "\n",
      "Removed 56 features.\n",
      "['missing', 'single_unique', 'collinear', 'zero_importance', 'low_importance'] methods have been run\n",
      "\n",
      "Removed 56 features including one-hot features.\n"
     ]
    }
   ],
   "source": [
    "fs.identify_all(selection_params = {'missing_threshold': 0.5, \n",
    " 'correlation_threshold': 0.98, \n",
    " 'task': 'regression', \n",
    " 'eval_metric': 'mae', \n",
    " 'cumulative_importance': 0.99})\n",
    "train_removed = fs.remove(methods = 'all')\n",
    "train_removed_all = fs.remove(methods = 'all', keep_one_hot=False) #返回的df中不包含onehot特征\n",
    "feat_saved=train_removed.columns.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 features with greater than 0.50 missing values.\n",
      "\n",
      "1 features with a single unique value.\n",
      "\n",
      "39 features with a correlation magnitude greater than 0.98.\n",
      "\n",
      "Training Gradient Boosting Model\n",
      "\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 363.552\tvalid_0's l2: 719002\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 369.282\tvalid_0's l2: 878491\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 372.227\tvalid_0's l2: 788135\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[998]\tvalid_0's l1: 356.579\tvalid_0's l2: 663678\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 363.343\tvalid_0's l2: 712594\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 356.282\tvalid_0's l2: 756099\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 364.592\tvalid_0's l2: 737274\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[999]\tvalid_0's l1: 355.124\tvalid_0's l2: 702416\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Early stopping, best iteration is:\n",
      "[828]\tvalid_0's l1: 367.297\tvalid_0's l2: 764084\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[1000]\tvalid_0's l1: 356.312\tvalid_0's l2: 690123\n",
      "\n",
      "38 features with zero importance after one-hot encoding.\n",
      "\n",
      "77 features required for cumulative importance of 0.99 after one hot encoding.\n",
      "66 features do not contribute to cumulative importance of 0.99.\n",
      "\n",
      "73 total features out of 143 identified for removal after one-hot encoding.\n",
      "\n",
      "['missing', 'single_unique', 'collinear', 'zero_importance', 'low_importance'] methods have been run\n",
      "\n",
      "Removed 73 features.\n",
      "['missing', 'single_unique', 'collinear', 'zero_importance', 'low_importance'] methods have been run\n",
      "\n",
      "Removed 73 features including one-hot features.\n"
     ]
    }
   ],
   "source": [
    "fs_xgb.identify_all(selection_params = {'missing_threshold': 0.5, \n",
    " 'correlation_threshold': 0.98, \n",
    " 'task': 'regression', \n",
    " 'eval_metric': 'mae', \n",
    " 'cumulative_importance': 0.99})\n",
    "train_removed_xgb = fs_xgb.remove(methods = 'all')\n",
    "train_removed_all_xgb = fs_xgb.remove(methods = 'all', keep_one_hot=False) #返回的df中不包含onehot特征\n",
    "feat_saved_xgb=train_removed_xgb.columns.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "feat_saved_xgb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_sel=data[[label]+feat_saved]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_xgb_sel=data_xgb[[label]+feat_saved_xgb]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of dataframe is 37304129.00 MB\n",
      "Memory usage after optimization is: 36760718.00 MB\n",
      "Decreased by 1.5%\n"
     ]
    }
   ],
   "source": [
    "my.reduce_mem_usage(data_sel).to_csv(\"data_sel.csv\",index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory usage of dataframe is 37133085.00 MB\n",
      "Memory usage after optimization is: 32423523.00 MB\n",
      "Decreased by 12.7%\n"
     ]
    }
   ],
   "source": [
    "my.reduce_mem_usage(data_xgb_sel).to_csv(\"data_xgb_sel.csv\",index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 高missing-values百分比的特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs.identify_missing(missing_threshold = 0.5) #查看缺失值\n",
    "fs.missing_stats.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_features = fs.ops['missing']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs.plot_missing()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 单个唯一值特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs.identify_single_unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "single_unique_features=fs.ops['single_unique']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "single_unique_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs.plot_unique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 共线性特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs.identify_collinear(correlation_threshold = 0.99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs.plot_collinear()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# list of collinear features to remove\n",
    "collinear_features = fs.ops['collinear']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "collinear_features[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dataframe of collinear features\n",
    "fs.record_collinear.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 零重要度特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Pass in the appropriate parameters\n",
    "fs.identify_zero_importance(task = 'regression', \n",
    " eval_metric = 'mae', \n",
    " n_iterations = 10, \n",
    " early_stopping = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# list of zero importance features\n",
    "zero_importance_features = fs.ops['zero_importance']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(zero_importance_features )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs.feature_importances.head(10).append(fs.feature_importances.tail(10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot the feature importances\n",
    "fs.plot_feature_importances(threshold = 0.995, plot_n = 12)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "one_hot_features = fs.one_hot_features\n",
    "base_features = fs.base_features\n",
    "print('There are %d original features' % len(base_features))\n",
    "print('There are %d one-hot features' % len(one_hot_features))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 低重要度特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs.identify_low_importance(cumulative_importance = 0.995)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "low_importance_features = fs.ops['low_importance']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(low_importance_features)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 需要移除的特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "drop_feature=list(set(low_importance_features+zero_importance_features+collinear_features+single_unique_features+missing_features))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(drop_feature)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_sel=data_all.drop(drop_feature,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lgb_cv_valid(data_sel)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_sel.to_csv(\"data_sel.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 移除指定特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_no_missing_zero = fs.remove(methods = ['missing', 'zero_importance'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_saved=train_removed.columns.tolist() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[feature_saved].info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#查看特征选择后的mae\n",
    "lgb_cv_valid(pd.concat([data[feature_saved],data[\"price\"]],axis=1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.3.2 Lasso"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def Lasso_feat_drop(train_x,bc_y_train,alpha=0.05): #套索回归，矩阵稀疏，将不重要的特征权重系数设为0\n",
    "    from sklearn.linear_model import Lasso #L1范数正则化\n",
    "    X_train, X_test, y_train, y_test = train_test_split(train_x,bc_y_train, test_size=0.66, random_state=42)\n",
    "    Lasso= Lasso(alpha=alpha)\n",
    "    Lasso.fit(X_train,y_train)\n",
    "    keys=train_x.columns\n",
    "    values=Lasso.coef_\n",
    "    importance=dict(zip(keys,values))\n",
    "    return [k for k, v in importance.items() if v ==0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_lasso=data_all.copy()\n",
    "ls=data_all.columns.tolist()\n",
    "ls.remove(\"price\")\n",
    "for i in ls:\n",
    "    data_lasso[i].fillna(data_lasso[i].mode()[0],inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_lasso.loc[data_lasso.SaleID<200000,\"price\"].isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import Lasso #L1范数正则化\n",
    "Lasso= Lasso(alpha=0.3)\n",
    "Lasso.fit(data_lasso[data_lasso.SaleID<200000].drop(\"price\",axis=1),data_lasso.loc[data_lasso.SaleID<200000,\"price\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lgb_cv_valid(df.drop(feat_lasso_drop,axis=1)) #误差增大，不能滤除这些参数"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. Model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.1 Model-LGBM"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Params tuning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=pd.read_csv(\"data.csv\")\n",
    "X_train=data[data.SaleID<200000].drop(\"price\",axis=1)\n",
    "bc_y_train, maxlog= stats.boxcox(data[data.SaleID<200000][\"price\"]) #bc_y 是box-cox后的数据，lamda是变换参数\n",
    "y_train=bc_y_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████| 5/5 [16:20<00:00, 196.15s/trial, best loss: 385.36]\n"
     ]
    }
   ],
   "source": [
    "#使用Hyperopt调参\n",
    "from hyperopt import fmin, tpe, hp,Trials,STATUS_OK\n",
    "def f_lgb(params):\n",
    "    clf = lgb.LGBMRegressor(**params)\n",
    "    kf = KFold(n_splits=5, random_state=42)  # sklearn的交叉验证模块，用于划分数据\n",
    "    MAE=[]\n",
    "     # 交叉验证划分此时的训练集和验证集\n",
    "    train_x=X_train\n",
    "    for train, test in kf.split(train_x):\n",
    "        kf_X_tr = train_x.iloc[train]  # 训练集集\n",
    "        kf_y_tr = bc_y_train[train]    # 训练集标签值\n",
    "        kf_X_val =train_x.iloc[test]  # 验证集\n",
    "        kf_y_val = bc_y_train[test]    # 验证集标签值\n",
    "        clf = lgb.LGBMRegressor(objective='regression',metric= 'mae',silent=1,num_leaves=80,learning_rate=0.03, n_estimators=300)\n",
    "        clf.fit(kf_X_tr,kf_y_tr,categorical_feature=cat_feat,verbose=0) \n",
    "        result=clf.predict(kf_X_val)\n",
    "        MAE.append(mean_absolute_error(bcback(kf_y_val,maxlog),bcback(result,maxlog)))\n",
    "    return {'loss': round(np.mean(MAE),2), 'status': STATUS_OK}\n",
    "\n",
    "# hp.normal，hp.lognormal，hp.quniform\n",
    "space_lgb = {\n",
    "        \"objective\":'regression_l1',\n",
    "        \"eval_metric\":\"mae\",\n",
    "        \"n_estimators\":100,\n",
    "        'num_leaves': hp.choice('num_leaves', range(20,130)),\n",
    "        \"learning_rate\":hp.uniform(\"learning_rate\",0.01,0.1),\n",
    "        \"reg_alpha\":hp.uniform(\"reg_alpha\",0,0.05),\n",
    "        \"reg_lambda\":hp.uniform(\"reg_lambda\",0,0.05)}\n",
    "trials = Trials()\n",
    "best1 = fmin(f_lgb, space_lgb, algo=tpe.suggest, max_evals=5, trials=trials)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'learning_rate': 0.07922079045340327,\n",
       " 'num_leaves': 26,\n",
       " 'reg_alpha': 0.012492997412838236,\n",
       " 'reg_lambda': 0.025742522365766837}"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best1 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(1)#, figsize=(10,10))\n",
    "xs = [t['misc']['vals']['num_leaves'] for t in trials.trials]\n",
    "ys = [-t['result']['loss'] for t in trials.trials]\n",
    "ax.scatter(xs, ys, s=20)\n",
    "# ax.set_title('Iris Dataset - KNN', fontsize=18)\n",
    "ax.set_xlabel('num_leaves', fontsize=12)\n",
    "ax.set_ylabel('cross validation mae', fontsize=12)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n",
       "              importance_type='split', learning_rate=0.07922079045340327,\n",
       "              max_depth=-1, min_child_samples=20, min_child_weight=0.001,\n",
       "              min_split_gain=0.0, n_estimators=100, n_jobs=-1, num_leaves=26,\n",
       "              objective=None, random_state=None, reg_alpha=0.012492997412838236,\n",
       "              reg_lambda=0.025742522365766837, silent=True, subsample=1.0,\n",
       "              subsample_for_bin=200000, subsample_freq=0)"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = lgb.LGBMRegressor(**best1)\n",
    "clf.fit(X_train, bc_y_train,categorical_feature=cat_feat,verbose=0) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_x=data[data.SaleID>=200000].drop(label,axis=1)\n",
    "predict=clf.predict(test_x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x25f8be7f5c8>"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAD+CAYAAADPjflwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHfJJREFUeJzt3X10VPW97/H3PGTyMDPhGa0P4RB0coo2heC1xwpYDiK919Qqqz6UK7ddtC5EjUtqNMIRjTSGYIVWorjkFO29sZSJWtt7e9tjm1YJing0NXJjeVCqqKAQDMjMkGQys3/3j4QxQX5MooSQ8nmtxUr2/n1n8v1tMnzYe/be4zLGGERERI7CPdANiIjIyUshISIiVgoJERGxUkiIiIiVQkJERKwUEiIiYqWQEBERK4WEiIhYKSRERMRKISEiIlYKCRERsVJIiIiIlUJCRESsFBIiImKlkBARESuFhIiIWCkkRETESiEhIiJWCgkREbHyDnQDJ4tnX99FIun0WOf1uLlq4pkD1JGIyMBTSHRJJB06kuaItc5Ra0VEThU63CQiIlYKCRERsVJIiIiIlUJCRESsFBIiImKlkBARESuFhIiIWCkkRETESiEhIiJWCgkREbFSSIiIiJVCQkRErBQSIiJipZAQERErhYSIiFil/TwJx3EoLy9n27Zt+Hw+KioqGDNmTGq8traWdevW4fV6mT9/PtOmTaOlpYXS0lLa2toYPXo0S5cuJTs7+6i1Bw4cYObMmYRCIQAuvfRSvve97/XfjEVEpNfShkRdXR3xeJxwOExjYyNVVVU8+uijADQ3N1NTU8MzzzxDe3s7s2fP5uKLL2bVqlUUFxcza9YsVq9eTTgc5vLLLz9q7d/+9jeKi4tZvHhxv09WRET6Ju3hpoaGBqZMmQLAhAkTaGpqSo1t3ryZiRMn4vP5CAaD5OXlsXXr1h6PmTp1Khs3brTWNjU18eabb3L99ddz6623snfv3n6aqoiI9FXakIhGowQCgdSyx+MhkUikxoLBYGrM7/cTjUZ7rPf7/UQiEWttfn4+t956K08++SSXXnopFRUVaZuurq6moKCAgoICqqurez9bERHpk7SHmwKBALFYLLXsOA5er/eoY7FYjGAwmFqflZVFLBYjNzfXWltYWEh2djYAM2bMYOXKlWmbLikpoaSkpPezFBGRzyXtnkRRURH19fUANDY2pt5gBigsLKShoYH29nYikQg7duwgFApRVFTE+vXrAaivr2fSpEnW2rvvvpvnnnsOgJdffpnzzjuvP+YpIiKfg8sYY45VcPjspu3bt2OMobKykvr6evLy8pg+fTq1tbWEw2GMMcybN4+ZM2eyb98+ysrKiMViDBs2jOXLl5OTk3PU2vfff59FixYBkJ2dTUVFBaNHjz4hk+/uqdfepyPZc1NkeFxcfcHZJ7wXEZGTRdqQOFUoJEREPksX04mIiJVCQkRErBQSIiJipZAQERErhYSIiFgpJERExEohISIiVgoJERGxUkiIiIiVQkJERKwUEiIiYqWQEBERK4WEiIhYKSRERMRKISEiIlYKCRERsVJIiIiIlUJCRESsFBIiImKlkBARESuFhIiIWCkkRETESiEhIiJWCgkREbFSSIiIiJVCQkRErBQSIiJipZAQERErhYSIiFgpJERExCptSDiOwz333MO1117LnDlz2LlzZ4/x2tpaZs2axTXXXMPzzz8PQEtLC3PnzmX27NncdttttLa2WmsPe/XVV7nkkkuO17xEROQ4SBsSdXV1xONxwuEwt99+O1VVVamx5uZmampqWLduHWvWrGHFihXE43FWrVpFcXExa9euZfz48YTDYWstwIcffsjjjz9OIpHov5mKiEifpQ2JhoYGpkyZAsCECRNoampKjW3evJmJEyfi8/kIBoPk5eWxdevWHo+ZOnUqGzdutNa2t7dz7733Ul5e3uumq6urKSgooKCggOrq6j5OWUREesubriAajRIIBFLLHo+HRCKB1+slGo0SDAZTY36/n2g02mO93+8nEolYa5csWcLcuXM57bTTet10SUkJJSUlva4XEZHPJ+2eRCAQIBaLpZYdx8Hr9R51LBaLEQwGe6yPxWLk5uYetTYjI4PXXnuNRx55hDlz5vDJJ5+wYMGC4zY5ERH5YtKGRFFREfX19QA0NjYSCoVSY4WFhTQ0NNDe3k4kEmHHjh2EQiGKiopYv349APX19UyaNOmotYWFhTz33HPU1NRQU1PDkCFD+OlPf9pPUxURkb5Ke7hpxowZvPTSS1x33XUYY6isrOSJJ54gLy+P6dOnM2fOHGbPno0xhgULFpCZmcn8+fMpKyujtraWYcOGsXz5cnJyco5aKyIiJy+XMcYMdBMng6dee5+OZM9NkeFxcfUFZw9QRyIiA08X04mIiJVCQkRErBQSIiJipZAQERErhYSIiFgpJERExEohISIiVgoJERGxUkiIiIiVQkJERKwUEiIiYqWQEBERK4WEiIhYKSRERMRKISEiIlYKCRERsVJIiIiIlUJCRESsFBIiImKlkBARESuFhIiIWCkkRETESiEhIiJWCgkREbFSSIiIiJVCQkRErBQSIiJipZAQERErhYSIiFh50xU4jkN5eTnbtm3D5/NRUVHBmDFjUuO1tbWsW7cOr9fL/PnzmTZtGi0tLZSWltLW1sbo0aNZunQp2dnZR61tbm6mtLSUjo4ORo0aRVVVFdnZ2f06aRER6Z20exJ1dXXE43HC4TC33347VVVVqbHm5mZqampYt24da9asYcWKFcTjcVatWkVxcTFr165l/PjxhMNha+3q1au56qqrWLt2Leeccw7hcLhfJywiIr2XNiQaGhqYMmUKABMmTKCpqSk1tnnzZiZOnIjP5yMYDJKXl8fWrVt7PGbq1Kls3LjRWrto0SKuuOIKHMfhww8/ZMSIEf00VRER6au0IRGNRgkEAqllj8dDIpFIjQWDwdSY3+8nGo32WO/3+4lEItZal8tFMpmkuLiYV155haKiorRNV1dXU1BQQEFBAdXV1b2frYiI9Ena9yQCgQCxWCy17DgOXq/3qGOxWIxgMJhan5WVRSwWIzc311oLkJGRwe9//3s2btxIWVkZTz755DF7KikpoaSkpG8zFRGRPku7J1FUVER9fT0AjY2NhEKh1FhhYSENDQ20t7cTiUTYsWMHoVCIoqIi1q9fD0B9fT2TJk2y1paXl7Np0yagc+/C5XL1xzxFRORzcBljzLEKDp/dtH37dowxVFZWUl9fT15eHtOnT6e2tpZwOIwxhnnz5jFz5kz27dtHWVkZsViMYcOGsXz5cnJyco5au2PHDsrLywFwu93cc889jBs37kTMvYenXnufjmTPTZHhcXH1BWef8F5ERE4WaUPiVKGQEBH5LF1MJyIiVgoJERGxUkiIiIiVQkJERKwUEiIiYqWQEBERK4WEiIhYKSRERMRKISEiIlYKCRERsVJIiIiIlUJCRESsFBIiImKlkBARESuFhIiIWCkkRETESiEhIiJWCgkREbFSSIiIiJVCQkRErBQSIiJipZAQERErhYSIiFgpJERExEohISIiVgoJERGxUkiIiIiVQkJERKwUEiIiYqWQEBERK2+6AsdxKC8vZ9u2bfh8PioqKhgzZkxqvLa2lnXr1uH1epk/fz7Tpk2jpaWF0tJS2traGD16NEuXLiU7O/uotbt372bRokUkk0mMMSxZsoT8/Px+nbSIiPRO2j2Juro64vE44XCY22+/naqqqtRYc3MzNTU1rFu3jjVr1rBixQri8TirVq2iuLiYtWvXMn78eMLhsLX2oYce4vrrr6empoZ58+axYsWKfp2wiIj0XtqQaGhoYMqUKQBMmDCBpqam1NjmzZuZOHEiPp+PYDBIXl4eW7du7fGYqVOnsnHjRmttWVkZl1xyCQDJZJLMzMy0TVdXV1NQUEBBQQHV1dWfa+IiIpJe2sNN0WiUQCCQWvZ4PCQSCbxeL9FolGAwmBrz+/1Eo9Ee6/1+P5FIxFo7fPhwAP7+97+zbNkyHnnkkbRNl5SUUFJS0vtZiojI55J2TyIQCBCLxVLLjuPg9XqPOhaLxQgGgz3Wx2IxcnNzrbUAmzZt4uabb+aBBx7Q+xEiIieRtCFRVFREfX09AI2NjYRCodRYYWEhDQ0NtLe3E4lE2LFjB6FQiKKiItavXw9AfX09kyZNstZu2rSJ+++/n5///Od85Stf6adpiojI5+EyxphjFRw+u2n79u0YY6isrKS+vp68vDymT59ObW0t4XAYYwzz5s1j5syZ7Nu3j7KyMmKxGMOGDWP58uXk5OQctfaKK64gHo8zatQoAMaOHcuSJUtOyOS7e+q19+lI9twUGR4XV19w9gnvRUTkZJE2JE4VCgkRkc/SxXQiImKlkBARESuFhIiIWCkkRETESiEhIiJWCgkREbFSSIiIiJVCQkRErBQSIiJipZAQERErhYSIiFgpJERExEohISIiVgoJERGxUkiIiIiVQkJERKwUEiIiYqWQEBERK4WEiIhYKSRERMRKISEiIlYKCRERsVJIiIiIlUJCRESsFBIiImKlkBARESuFhIiIWCkkRETESiEhIiJWaUPCcRzuuecerr32WubMmcPOnTt7jNfW1jJr1iyuueYann/+eQBaWlqYO3cus2fP5rbbbqO1tdVae9gvfvELHnzwweM1LxEROQ7ShkRdXR3xeJxwOMztt99OVVVVaqy5uZmamhrWrVvHmjVrWLFiBfF4nFWrVlFcXMzatWsZP3484XDYWtvW1kZpaSlr167t14n2VTzhDHQLIiIDLm1INDQ0MGXKFAAmTJhAU1NTamzz5s1MnDgRn89HMBgkLy+PrVu39njM1KlT2bhxo7W2vb2dK6+8khtvvLGfpth3b++Nct//eZMde6MD3YqIyIBKGxLRaJRAIJBa9ng8JBKJ1FgwGEyN+f1+otFoj/V+v59IJGKtHTJkCJMnT+5T09XV1RQUFFBQUEB1dXWfHtsb7+8/hAHe3H3wuD+3iMhg4k1XEAgEiMViqWXHcfB6vUcdi8ViBIPB1PqsrCxisRi5ubnW2s+jpKSEkpKSz/XY3jjY2gHAux/H0lSKiPxjS7snUVRURH19PQCNjY2EQqHUWGFhIQ0NDbS3txOJRNixYwehUIiioiLWr18PQH19PZMmTbLWnowOh8SuA620xpMD3I2IyMBJuycxY8YMXnrpJa677jqMMVRWVvLEE0+Ql5fH9OnTmTNnDrNnz8YYw4IFC8jMzGT+/PmUlZVRW1vLsGHDWL58OTk5OUetPRkdbOs8nOYYaHz/ABeNGzHAHYmIDAyXMcYMdBMng6dee5+OZOemWPqHLUS6gqL0shC3/Ou5A9maiMiA0cV0R0g6hmhbgpGBzr2cV9/dP8AdiYgMnLSHm0410fYEBjhjaBZg+Ot7+3Ecg9vtGujWREROOO1JHOHwm9a5WRmMGeEn0pZg+97IAHclIjIwFBJH+CQVEl7GjsgB4DUdchKRU5RC4ggH27pCIjuDfxrpB+DVd1sGsiURkQGjkDjCwdbOs5pyszIYFczkrGHZ/KHpI3YdaB3gzkRETjyFxBG670m4XS5uuzREPOHwsz9tH+DOREROPIXEEVIhkdV54tdVE8+k4LQgz/z1A97aozewReTUopA4wsHWBDk+D16PG6/Hxf9+YzdfHzcCx8Bt4Uaeeu19nn1910C3KSJyQigkjnCwrYMh2Rmp5UTS4ZzRAcYMz+HN3QfZ+lGERFKfNSEipwaFRDdtHUniCYfcrIwe610uF8WFZ+ACftu4WyEhIqcMhUQ3qQvpsj97IfqZw7L5Wv5w9kXbWb+9+US3JiIyIBQS3Ry+++uRexKHXTb+dIKZXuq27OXdffqsCRH5x6eQ6Kb7LTmOJivDw3/7ypdIOIarH3uZl3d8fCLbExE54RQS3Xx6jYT9voeFZw2huPBLtMTi/Pefb+LnG/5+otoTETnhFBLddL+QzsblcnFJaBS18/6F0cEsKv7vFv7fB5+cqBZFRE4ohUQ33W/Jkc6kMcNZcc1XAfjx7/6GPrtJRP4RKSS6+aS1A6/bRY7Pc8w6r8fFs6/vYteBVsZ/KZf/fLeFe377pi6yE5F/OAqJLsYYmiPtjAxk4nKl/4ChRNKhI2n45nmn43bB7zbv5qNPWrVHISL/UBQSXQ60dhBPOowKZvbpcSODmVyUP4L9hzpY9h/b+Frln/lZ3XaSjsJCRAY/hUSXPQfbARjdx5AAmHn+6Vzx1TMoPGsICcfws7q3+MH/fJVPDnUc7zZFRE4ofcZ1l70H2wAYnZvV58d63W7+JX8E0/55FIfiSf7Xxp28sK2Zr1f9mRGBTHKzvJT9139myrmjjnfbIiL9SnsSXfZEPv+eRHc+j5s5F41hWsEo3C4XH+w/RNPug/yPx/+TFX/SYSgRGVy0J9Flz8E23C4YEfB94edyu1zMGH86M8afjjGGjw628eu/7mLln9/i2dc/YHQwi2E5PiafM4IZ553OmUOzj8MMRESOP5fR6TgYYxh/z3P4M738aEYotT7b5yaRNHQke26iz7M+1p6k9rX3eWtPlPZEku47FDPPO43Kq77CiMAX24sRETnetCcBNEfbae1IMnakv99+RqbXzXX/JQ/oDKWDbQm274nwzr4Yz725h7++d4A7Zhbgcbk40NrB6GAm40YFOC03E5/XTVaGhwyPjg6KyImlkADe3hMFvvj7Eb3lcrkYkp3B1NBIJp87kjOHZvEfTXu48+nN1sf4PG6+9/Ux3Dr9XIK9uCJcROR4UEgAbzd3hUTuiT/c4ziGi88ZxbhRQbbviZDt8zAk28uB1gQffdJKtD2B4xh2f9LGv294h9807ubKCWeQN8KP3+fh3Y8PsWt/K/80IoeiMcP46tlDCWTqr1VEjg/9awK8ldqT6Pvpr8fL6UOyOH1I588/2nsbHUmHTX//mLote/j3De9Yn8ftgoLTc5lw9hDOGJLN6K7DVUmncyyQ6SWQ5SWYmYE/08MIfya52d5eXWUuIqcehQTw1t4ILmDkSfzGcYbHzWXnncZF+SPYdaCVlliceNJhhD+T03J97InEeac5yrsfH+KtPRG2fHiw188dyPRyxtAszhyazRldf84als1puZ1nYQWzvMQTDtH2BD6vm1GBTIbmZChYRE4BaUPCcRzKy8vZtm0bPp+PiooKxowZkxqvra1l3bp1eL1e5s+fz7Rp02hpaaG0tJS2tjZGjx7N0qVLyc7O7lPtifT23ijD/D583pP/jeFsn4cxI/yMGeHvts7NcH8m54wKAJBwHPZF4rQnEhxsS9CRcHC5XBhjaEs4JB3DoXiSQ/EE0fYE+w918F7LIbZ37VH1RobHxchAJqOCmYw6/LXrz7AcH63xJAda47hwMSQng0Cml46kQzzhMCQ7gy917eUMyc4g0+vGMXAonqA1nuzqLUlrR4K2Dofhfh9jRuSQ4/OSSDq0JRz8Pk+PkDLGKLRE+kHakKirqyMejxMOh2lsbKSqqopHH30UgObmZmpqanjmmWdob29n9uzZXHzxxaxatYri4mJmzZrF6tWrCYfDXH755b2u/f73v9/f8+btvRF+sfFdko5hXzTOl7+US4an5z8yXrcbMIAzqNZneDycPTybrAwPCceQSPast603BvYfirMv2s7+Qx0cbO3gUDxJeyKJ1+0iw+Mm6RgibQkOtnUQbU+w5cODbE5+sbOoPW5Xry4y9HndxBOdPfs8bkbnZuJywf5YZy8etyvVp9fT+TXD7cLjceHClXqOQKaX7IzOO/06xmDo3HyHv3eMwZjOLeoCXK7Oa1/crs6TDtxdy67U10/XHa5x0bXsJvWzu385HGiHf+Ncru7fdxvrWunu8XMP/5zO5XR6c5J711b4QlLzPNrYMdo0XdveMZ1hf/h7p6txF67U9jm83V1dG+cz63vRp+HTbWIw3b7vvq1Mqrej1R6e06f9dP2ddPVkuv0+mcO/T6bzOVLzOeJ3xeWCjqQhnnAwGDK9bjxuFx0JQzzp4HJBpteD1+0i4XT+x++GKfn9elZm10Y4tsrKSvO73/0utTx58uTU93V1dWbx4sWp5Ztuusm88cYb5sorrzR79+41xhizZcsWc8MNN/SpNp2VK1eaUChkQqGQWblyZdr63jhezzMQBnPvxqj/gTSYezdG/Z8IaY+vRKNRAoFAatnj8ZBIJFJjwWAwNeb3+4lGoz3W+/1+IpFIn2rTKSkpYdu2bWzbto2SkpJexuGxPfzww8fleQbCYO4d1P9AGsy9g/o/EdKGRCAQIBaLpZYdx8Hr9R51LBaLEQwGe6yPxWLk5ub2qVZERE4OaUOiqKiI+vp6ABobGwmFPr1tRWFhIQ0NDbS3txOJRNixYwehUIiioiLWr18PQH19PZMmTepTrYiInBw85eXl5ccqyM/PZ8OGDTz22GNs2LCB8vJyfvvb39LS0sL555+P1+tlyZIlPPvss9xyyy0UFBRw3nnnsWrVKn71q19x4MAB7rjjDoYOHdrr2oyMgbmi+Gtf+9qA/NzjYTD3Dup/IA3m3kH99zfd4E9ERKxO/gsDRERkwCgkRETESiEhIiJWCgkREbFSSIiIiJVCQkRErE7pW4Wnu8PtQHrjjTd48MEHqampYefOndx11124XC7OPfdc7r33XtxuNw8//DAvvPACXq+XRYsWUVhY2Kfa462jo4NFixaxa9cu4vE48+fP55xzzhkUvQMkk0nuvvtu3nnnHTweD0uXLsUYM2j6P+zjjz9m1qxZPP7443i93kHV/5VXXpm6Tc9ZZ53Ftddey/3334/H42Hy5Mnccsst1tdtY2Njr2v7w2OPPcZf/vIXOjo6+O53v8uFF144qLa91QDfO2pAPffcc6asrMwYY8zrr79ubrzxxgHuqNPq1atNcXGxufrqq40xxsybN89s2rTJGGPM4sWLzR//+EfT1NRk5syZYxzHMbt27TKzZs3qc+3x9vTTT5uKigpjjDEtLS3mkksuGTS9G2PMn/70J3PXXXcZY4zZtGmTufHGGwdV/8YYE4/HzU033WQuu+wy8/bbbw+q/tva2sy3v/3tHuuuuOIKs3PnTuM4jvnhD39ompqarK/bvtQeb5s2bTLz5s0zyWTSRKNRs3LlykG17Y/llN6TaGhoYMqUKQBMmDCBpqamAe6oU15eHtXV1dx5550AvPnmm1x44YUATJ06lZdeeomxY8cyefJkXC4XZ5xxBslkkpaWlj7VDh8+/Lj2/c1vfpOZM2emlj0ez6DpHeDSSy/lG9/4BgC7d+9m5MiRvPDCC4Omf4Bly5Zx3XXXsXr1amDw/O4AbN26ldbWVubOnUsikaCkpIR4PE5eXh4AkydP5uWXX6a5ufkzr9toNNrr2v7w4osvEgqFuPnmm4lGo9x5553U1tYOmm1/LKf0exLHusPtQJo5c2bqJorQ8wN1ut9Vt3vvh9f3pfZ48/v9BAIBotEot956K7fddtug6f0wr9dLWVkZP/7xj5k5c+ag6v/Xv/41w4cPT/2jCIPndwcgKyuLH/zgB6xZs4b77ruPhQsX9vgAMltPHo/H2ueJeo3v37+fpqYmHnroIe677z5KS0sH1bY/llN6T+JYd7g9mbjdn2Z5urvq9qW2P3z44YfcfPPNzJ49m29961v85Cc/GTS9H7Zs2TJKS0u55ppraG9vHzT9P/PMM7hcLl5++WW2bNlCWVkZLS0tg6b/sWPHMmbMGFwuF2PHjiUYDHLgwIHP9NTW1vaZ1+3R+rTV9sdrfOjQoeTn5+Pz+cjPzyczM5OPPvroM/2crNv+WE7pPYlj3eH2ZDJ+/HheeeUVoPNOuRdccAFFRUW8+OKLOI7D7t27cRyH4cOH96n2eNu3bx9z587ljjvu4Dvf+c6g6h3gN7/5DY899hgA2dnZuFwuzj///EHT/y9/+UuefPJJampq+PKXv8yyZcuYOnXqoOn/6aefpqqqCoA9e/bQ2tpKTk4O7733HsYYXnzxxVRPR75uA4EAGRkZvartD5MmTWLDhg0YY1K9X3TRRYNm2x/LKX2Dv8NnPmzfvh1jDJWVlYwbN26g2wLggw8+4Ec/+hG1tbW88847LF68mI6ODvLz86moqMDj8VBdXU19fT2O47Bw4UIuuOCCPtUebxUVFfzhD38gPz8/te7f/u3fqKioOOl7Bzh06BALFy5k3759JBIJbrjhBsaNGzcotv2R5syZQ3l5OW63e9D0H4/HWbhwIbt378blclFaWorb7aayspJkMsnkyZNZsGCB9XXb2NjY69r+8MADD/DKK69gjGHBggWcddZZg2bbH8spHRIiInJsp/ThJhEROTaFhIiIWCkkRETESiEhIiJWCgkREbFSSIiIiJVCQkRErP4/98PydpiifvwAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.distplot(bcback(predict,maxlog))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_lgb=pd.DataFrame()\n",
    "result_lgb[\"SaleID\"]=range(150000,200000)\n",
    "result_lgb[\"price\"]=bcback(predict,maxlog)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    50000.000000\n",
       "mean      2107.124008\n",
       "std       4182.227546\n",
       "min         39.100829\n",
       "25%        122.136307\n",
       "50%        186.690342\n",
       "75%       2121.491647\n",
       "max      62813.532771\n",
       "Name: price, dtype: float64"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_lgb[\"price\"].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "result_lgb.to_csv(\"result_lgb.csv\",index=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.2 Model-CATBOOST"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Params tuning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████| 2/2 [58:19<00:00, 1749.51s/trial, best loss: 0.22414662417918302]\n"
     ]
    }
   ],
   "source": [
    "def f_cab(params):\n",
    "    clf = cab.CatBoostRegressor(**params)\n",
    "    kf = KFold(n_splits=5, random_state=42)  # sklearn的交叉验证模块，用于划分数据\n",
    "    MAE=[]\n",
    "     # 交叉验证划分此时的训练集和验证集\n",
    "    train_x=X_train\n",
    "    for train, test in kf.split(train_x):\n",
    "        kf_X_tr = train_x.iloc[train]  # 训练集集\n",
    "        kf_y_tr = bc_y_train[train]    # 训练集标签值\n",
    "        kf_X_val =train_x.iloc[test]  # 验证集\n",
    "        kf_y_val = bc_y_train[test]    # 验证集标签值\n",
    "        clf = lgb.LGBMRegressor(objective='regression',metric= 'mae',silent=1,num_leaves=80,learning_rate=0.03, n_estimators=300)\n",
    "        clf.fit(kf_X_tr,kf_y_tr,categorical_feature=cat_feat,verbose=0) \n",
    "        result=clf.predict(kf_X_val)\n",
    "        MAE.append(mean_absolute_error(bcback(kf_y_val,maxlog),bcback(result,maxlog)))\n",
    "    return {'loss': round(np.mean(MAE),2), 'status': STATUS_OK}\n",
    "\n",
    "# hp.normal，hp.lognormal，hp.quniform\n",
    "space_cab = {\n",
    "        \"loss_function\":'MAE',\n",
    "        \"verbose\":0,\n",
    "        \"n_estimators\":100,\n",
    "        'depth': hp.choice('depth', range(4,15)),\n",
    "        \"learning_rate\":hp.uniform(\"learning_rate\",0.01,0.1),\n",
    "        \"l2_leaf_reg\":hp.uniform(\"l2_leaf_reg\",0,0.05)}\n",
    "trials=Trials()\n",
    "best_cab = fmin(f_cab, space_cab, algo=tpe.suggest, max_evals=2, trials=trials)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [],
   "source": [
    "best_cab ={'learning_rate': 0.06678548500225957, 'depth': 10, 'l2_leaf_reg': 0.011592262655679031,\"loss_function\":'MAE', \"verbose\":0,}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['name',\n",
       " 'model',\n",
       " 'brand',\n",
       " 'bodyType',\n",
       " 'fuelType',\n",
       " 'gearbox',\n",
       " 'notRepairedDamage',\n",
       " 'regionCode',\n",
       " 'reg_time',\n",
       " 'used_time_cut']"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cat_feat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_feat_index=[data.columns.tolist().index(i) for i in cat_feat]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<catboost.core.CatBoostRegressor at 0x25f8ea20648>"
      ]
     },
     "execution_count": 157,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = cab.CatBoostRegressor(**best_cab )\n",
    "clf.fit(X_train, bc_y_train,cat_features=cat_feat_index,verbose=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_x=data[data.SaleID>=200000].drop(label,axis=1)\n",
    "predict=clf.predict(test_x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x25f8ea36c08>"
      ]
     },
     "execution_count": 159,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAD+CAYAAADPjflwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHvNJREFUeJzt3Xt0VOXdL/DvntkzucxMIFwCtTA0QScaMYYJr6AHQnmRpmcZqbIqIEesh9YFEWOlxDdCFSPGEKzQV2Lx4BHpOrE0iaLV06MHy1EzQIDilIBRCCVY7kJCRGaGzHU/548kA9E8TAIkmZjvZy0X2fv5zczv2Th8s2dfRhFCCBAREXVA19sNEBFR9GJIEBGRFEOCiIikGBJERCTFkCAiIimGBBERSTEkiIhIiiFBRERSDAkiIpJiSBARkRRDgoiIpBgSREQkpUYq0DQNhYWFqKurg9FoRFFREUaNGhUer6ysRHl5OVRVRW5uLqZMmYKmpibk5+fD6/UiKSkJK1asQFxcXIe1586dQ3Z2Nmw2GwDgzjvvxC9+8YvumzEREXWeiGDz5s2ioKBACCHEnj17xIIFC8JjZ86cETk5OcLn84nz58+Hf37uuefEpk2bhBBCrFu3TmzYsEFau337drF8+fJIbRARUS+I+HGT0+nEpEmTAAAZGRmora0Nj+3btw9jx46F0WiExWKB1WrFgQMH2j0mKysL1dXV0tra2lp8/vnneOCBB/DYY4/hzJkzEYOttLQUqampSE1NRWlp6ZXmIxERRRDx4ya32w2z2Rxe1uv1CAaDUFUVbrcbFoslPGYymeB2u9utN5lMcLlc0tqUlBSMGTMGd9xxB9577z0UFRVhzZo1l+0pLy8PeXl5XZ4sERF1TcQ9CbPZDI/HE17WNA2qqnY45vF4YLFY2q33eDxISEiQ1k6YMAHjx48HAEybNg1ffPHFtZkZERFdtYghYbfb4XA4AAA1NTXhA8wAkJ6eDqfTCZ/PB5fLhfr6ethsNtjtdlRVVQEAHA4HMjMzpbVPPfUUNm/eDADYsWMHbr755u6YJxERXQFFiMt/fWnb2U0HDx6EEALFxcVwOBywWq2YOnUqKisrUVFRASEE5s+fj+zsbDQ2NqKgoAAejweJiYlYtWoV4uPjO6w9duwYli5dCgCIi4tDUVERkpKSemTyRER0eRFDoj94Z88JBENau3WqXod7x/6wlzoiIooOEQ9c9wfBkIZA6NtZqXVYS0TUn/CKayIikmJIEBGRFEOCiIikGBJERCTFkCAiIimGBBERSTEkiIhIiiFBRERSDAkiIpJiSBARkRRDgoiIpBgSREQkxZAgIiIphgQREUkxJIiISIohQUREUgwJIiKSYkgQEZEUQ4KIiKQYEkREJMWQICIiKYYEERFJMSSIiEiKIUFERFIMCSIikmJIEBGRFEOCiIikGBJERCTFkCAiIimGBBERSTEkiIhIiiFBRERSDAkiIpKKGBKapmHZsmWYNWsW5s6diyNHjrQbr6ysxIwZMzBz5kx8/PHHAICmpibMmzcPc+bMweOPP47m5mZpbZvdu3dj8uTJ12peRER0DUQMiS1btsDv96OiogKLFy9GSUlJeKyhoQFlZWUoLy/H+vXrsXr1avj9fqxduxY5OTnYuHEj0tLSUFFRIa0FgFOnTuH1119HMBjsvpkSEVGXRQwJp9OJSZMmAQAyMjJQW1sbHtu3bx/Gjh0Lo9EIi8UCq9WKAwcOtHtMVlYWqqurpbU+nw/PPPMMCgsLu2eGRER0xSKGhNvthtlsDi/r9frwb/xutxsWiyU8ZjKZ4Ha72603mUxwuVzS2uXLl2PevHkYNmxYp5suLS1FamoqUlNTUVpa2unHERFR16iRCsxmMzweT3hZ0zSoqtrhmMfjgcViCa+PjY2Fx+NBQkJCh7UGgwGffvopjh49ij/84Q/45ptvsGjRIvz+97+/bE95eXnIy8vr8mSJiKhrIu5J2O12OBwOAEBNTQ1sNlt4LD09HU6nEz6fDy6XC/X19bDZbLDb7aiqqgIAOBwOZGZmdlibnp6OzZs3o6ysDGVlZRgwYEDEgCAiop4TcU9i2rRp2L59O2bPng0hBIqLi7FhwwZYrVZMnToVc+fOxZw5cyCEwKJFixATE4Pc3FwUFBSgsrISiYmJWLVqFeLj4zusJSKi6KUIIURvN9Hb3vz0GAKh9pvBoFdw37iRvdQREVF04MV0REQkxZAgIiIphgQREUkxJIiISIohQUREUgwJIiKSYkgQEZEUQ4KIiKQYEkREJMWQICIiKYYEERFJMSSIiEiKIUFERFIMCSIikmJIEBGRFEOCiIikGBJERCTFkCAiIimGBBERSTEkiIhIiiFBRERSDAkiIpJiSBARkRRDgoiIpBgSREQkxZAgIiIphgQREUkxJIiISIohQUREUgwJIiKSYkgQEZEUQ4KIiKQYEkREJMWQICIiKTVSgaZpKCwsRF1dHYxGI4qKijBq1KjweGVlJcrLy6GqKnJzczFlyhQ0NTUhPz8fXq8XSUlJWLFiBeLi4jqsbWhoQH5+PgKBAIYOHYqSkhLExcV166SJiKhzIu5JbNmyBX6/HxUVFVi8eDFKSkrCYw0NDSgrK0N5eTnWr1+P1atXw+/3Y+3atcjJycHGjRuRlpaGiooKae2rr76Ke++9Fxs3bsT111+PioqKbp0wERF1XsSQcDqdmDRpEgAgIyMDtbW14bF9+/Zh7NixMBqNsFgssFqtOHDgQLvHZGVlobq6Wlq7dOlSTJ8+HZqm4dSpUxg8eHA3TZWIiLoqYki43W6Yzebwsl6vRzAYDI9ZLJbwmMlkgtvtbrfeZDLB5XJJaxVFQSgUQk5ODnbt2gW73R6x6dLSUqSmpiI1NRWlpaWdny0REXVJxGMSZrMZHo8nvKxpGlRV7XDM4/HAYrGE18fGxsLj8SAhIUFaCwAGgwHvv/8+qqurUVBQgDfeeOOyPeXl5SEvL69rMyUioi6LuCdht9vhcDgAADU1NbDZbOGx9PR0OJ1O+Hw+uFwu1NfXw2azwW63o6qqCgDgcDiQmZkprS0sLMTOnTsBtOxdKIrSHfMkIqIroAghxOUK2s5uOnjwIIQQKC4uhsPhgNVqxdSpU1FZWYmKigoIITB//nxkZ2ejsbERBQUF8Hg8SExMxKpVqxAfH99hbX19PQoLCwEAOp0Oy5Ytw+jRo3ti7mFvfnoMgVD7zWDQK7hv3Mge7YOIKNpEDIn+gCFBRNQxXkxHRERSDAkiIpJiSBARkRRDgoiIpBgSREQkxZAgIiIphgQREUkxJIiISIohQUREUgwJIiKSYkgQEZEUQ4KIiKQYEkREJMWQICIiKYYEERFJMSSIiEiKIUFERFIMCSIikmJIEBGRFEOCiIikGBJERCTFkCAiIimGBBERSTEkiIhIiiFBRERSDAkiIpJiSBARkRRDgoiIpBgSREQkxZAgIiIphgQREUkxJIiISIohQUREUgwJIiKSYkgQEZGUGqlA0zQUFhairq4ORqMRRUVFGDVqVHi8srIS5eXlUFUVubm5mDJlCpqampCfnw+v14ukpCSsWLECcXFxHdaePHkSS5cuRSgUghACy5cvR0pKSrdOmoiIOifinsSWLVvg9/tRUVGBxYsXo6SkJDzW0NCAsrIylJeXY/369Vi9ejX8fj/Wrl2LnJwcbNy4EWlpaaioqJDWvvTSS3jggQdQVlaG+fPnY/Xq1d06YSIi6ryIIeF0OjFp0iQAQEZGBmpra8Nj+/btw9ixY2E0GmGxWGC1WnHgwIF2j8nKykJ1dbW0tqCgAJMnTwYAhEIhxMTEdMc8iYjoCkQMCbfbDbPZHF7W6/UIBoPhMYvFEh4zmUxwu93t1ptMJrhcLmntoEGDYDAYcPjwYaxcuRILFy6M2HRpaSlSU1ORmpqK0tLSzs+WiIi6JOIxCbPZDI/HE17WNA2qqnY45vF4YLFYwutjY2Ph8XiQkJAgrQWAnTt34tlnn8ULL7zQqeMReXl5yMvL6/wsiYjoikTck7Db7XA4HACAmpoa2Gy28Fh6ejqcTid8Ph9cLhfq6+ths9lgt9tRVVUFAHA4HMjMzJTW7ty5E88//zxee+013HLLLd00TSIiuhKKEEJcrqDt7KaDBw9CCIHi4mI4HA5YrVZMnToVlZWVqKiogBAC8+fPR3Z2NhobG1FQUACPx4PExESsWrUK8fHxHdZOnz4dfr8fQ4cOBQAkJydj+fLlPTL5Nm9+egyBUPvNYNAruG/cyB7tg4go2kQMif6AIUFE1DFeTEdERFIMCSIikmJIEBGRFEOCiIikGBJERCTFkCAiIimGBBERSTEkiIhIiiFBRERSDAkiIpJiSBARkRRDgoiIpBgSREQkxZAgIiIphgQREUkxJIiISIohQUREUgwJIiKSYkgQEZEUQ4KIiKQYEkREJMWQICIiKYYEERFJMSSIiEiKIUFERFIMCSIikmJIEBGRFEOCiIikGBJERCTFkCAiIimGBBERSTEkiIhIiiFBRERSDAkiIpKKGBKapmHZsmWYNWsW5s6diyNHjrQbr6ysxIwZMzBz5kx8/PHHAICmpibMmzcPc+bMweOPP47m5mZpbZs//vGPePHFF6/VvIiI6BqIGBJbtmyB3+9HRUUFFi9ejJKSkvBYQ0MDysrKUF5ejvXr12P16tXw+/1Yu3YtcnJysHHjRqSlpaGiokJa6/V6kZ+fj40bN3brRImIqOsihoTT6cSkSZMAABkZGaitrQ2P7du3D2PHjoXRaITFYoHVasWBAwfaPSYrKwvV1dXSWp/Ph3vuuQcLFizopikSEdGVihgSbrcbZrM5vKzX6xEMBsNjFoslPGYymeB2u9utN5lMcLlc0toBAwZg4sSJXWq6tLQUqampSE1NRWlpaZceS0REnadGKjCbzfB4POFlTdOgqmqHYx6PBxaLJbw+NjYWHo8HCQkJ0torkZeXh7y8vCt6LBERdV7EPQm73Q6HwwEAqKmpgc1mC4+lp6fD6XTC5/PB5XKhvr4eNpsNdrsdVVVVAACHw4HMzExpLRERRa+IexLTpk3D9u3bMXv2bAghUFxcjA0bNsBqtWLq1KmYO3cu5syZAyEEFi1ahJiYGOTm5qKgoACVlZVITEzEqlWrEB8f32EtERFFL0UIIXq7id725qfHEAgJnG8O4OBpFxrdPniDGl75b3YMNjPIiKj/irgn0Z9UfnoMhxsvHjd503kcCyaP7sWOiIh6F6+4vsQ3zQHEGfR4cMIoAEB1/dle7oiIqHcxJC7RHAjBHKvixh8kYFhCDHZ/2QR/UOvttoiIeg1DopUQAt5ACHEGPQBg9FAzmgMh7Dt+rpc7IyLqPQyJVv6gBk0gHBLXJ7VcQMiPnIioP2NItGoOhAAAccaWkEgZYoKiADsYEkTUjzEkWrWFRGzrnoQpRsVNwxPgPPo1vK1jRET9DUOiVXhPwnBxk9wxejD8QQ3/OPJ1b7VFRNSrGBKtvP6Ws5jajkkAwO2jBwMAdhzmR05E1D8xJFp9+5gEANyWPAh6nYIt+8+AF6YTUX/EkGj17WMSAGCJNWDqjUnYf+o8PjvxTW+1RkTUaxgSrZr9bcck9O3WzxlvBQBs3HW0x3siIuptDIlW3g4+bgKASTcMxQ8HxuG9vSfh8gZ6ozUiol7DkGh18eymlpBQ9Qre2XMCb//jOG75YQIu+ENY9u7neGfPid5sk4ioRzEkWrV93HTpMYlgSEMgJJBhTYSu9cI6N/cmiKgfYUi0ag6EoFOAGPW7myQh1oC0HyTgq/NeFP7vL/DQhr/jyFlPB89CRPT9wpBo5Q2EEGvQQ1GUDsfvHTsC/35jEpIsMfikrgHP/5/9PdwhEVHPY0i0am4NCZk4ox533jQMv5lmg22YGZ/UNeA8P3oiou85hkSrZn/oO6e/yky/9Tr4Qxo2137VzV0REfUuhgSAQEhDUBPfOf1V5u5brwMAvLf3ZHe2RUTU6xgS6Phq68sZNdiEW0cORHX9WTS6fd3ZGhFRr2JIQH619eVMv/U6hDSB9z871V1tERH1OrW3G4gGXQmJtovshBBQAPznln+i+tBZJA814T+yU6VnRxER9UUMCXR8B9jLCYY0xBtV3PzDAag98Q3+7+ctB7B1CvBE9o3d1icRUU9jSODSYxJd+/Tt/n8biXM3D8epb7z4oPYU/vBxPW4cnhA+sE1E1NfxmASu7JgEACiKgkSTEWnXJeChO34Ec4yKJ97aix31Z/n9E0T0vcCQQNc/burI8AGx+M9ZGfAFNdz/P3di6uoqrPl//4TbF7xWbRIR9TiGBK58T+JSql6ByxfEryYmI33EABxruoDVfzuI/1LyEd6tOYEvTp6H42ADDp1xX6u2iYi6HY9J4Lu3Cb9SwZCG5CFmJA8xwxcMYes/G+E42IBfl9eEaxQF+MXtP0J+dirMMdz8RBTd+K8UgAvXYE/i22LUlns93T56EKrqGiGEQLxRj5pj5/DH6n9h0z+O447RgzEyMR63jBiArBuGItFkvGavT0R0LTAkcPFb6WKuYUi0GWKOwc8yrkMg1HIge+INQ/FJXQOq6xux+fPT4TqdAqQOT8DwhBgkWWIxZ7wVt44ceM37ISLqCoYEWvYkYlQd9LruvxDOoNdhWtow3H3rcLh9IZz+xov6Bg/qvjqPf552Yf+p8wCATf84jsU/ScUDE6xwHvka/2r0YNrNw/HDgXHd3iMRURtF8FxNZCz/EBDAf/z04oVwcUYdgiER3gPoifVCCOh0wJeNF1D+96M4721/ZpRep+C/jhmOjJEDEQgJDIw3YPqt18HEYxtE1E36bUhomkDTBT+GmGNw41MfINFkRN6/3xAe742QuHT91xcCeP+zU2jy+JE8xIRBJgP2n3LhwFeudvWJ8QY8eHvLNRrHvr6AOIMetyUPgt2aiAFxBuh6YO+IiL6/+m1I/I+qevxucx1emp2BRzfuQfIQEx6elBIe7+2Q6Gi9EED9GTfc/iBURcHRpgvYduhs+Oysjhj1OsQYdIg16BFr0CFW1SPeqIdtmAVjrYlIssTA5QsgEBK4cbgFNw5PgEGvwBvQoCidvzMuEX0/RQwJTdNQWFiIuro6GI1GFBUVYdSoUeHxyspKlJeXQ1VV5ObmYsqUKWhqakJ+fj68Xi+SkpKwYsUKxMXFdam2u+0/dR4/f6Ua3qCGkCaQ9oMEPDDh4ryiMSQ6Wq8oAnuPfQOdomCQyYjmQAiHGzw4fb4Z/pCAP6ghEGr7TyDY+qc/pHW4XQx6BYqiwB/UoOoU2EclYkLyIDS4ffji5HloAkgeYsKowfFIiDXAFKPCHKvCHKNHjKpHUBPQNIGgJhDSNMQZVQxLiMFgUwz0OgU6BVCgAEpLgMUadFAUBZomcK45AKOqa3dqsKYJKAp440Tqd7yBEL5s9CBlqAkxqh4ubwD/a8cR1De4MWvcSIxPGdwjfUQMiQ8//BAfffQRSkpKUFNTg3Xr1uGVV14BADQ0NGDevHnYtGkTfD4f5syZg02bNuGFF15AWloaZsyYgVdffRVGoxF33XVXp2sfeuihnpg7th9qxH/fsBv+kIZ/+1EiZo4bGR6LNbT8gxf81j+m34f1QhM4fq4ZR85eaP3aVh0MOh2ONl3A0aYLEADijXpc8IdwrHUZaDkmogAIatdu59OgVxBvVOHyBtD2tINNRgw2G9Hk8eOsxw+DToeB8QbEG/UICQEhgGEJsRiRGAchgNPnvTjvDWKQyYDBphhoQuCCPwQhBBLiDIg3qnD7gjh3wQ+domBAnAGxBh1c3iBc3iBiDToMiDPCqCo47w22fEuhUY+E2JawavaHEAiJ8N5YcyAElzcITWt5flOMHsGQgC/YsvfVdhJEsDWMVZ0Cg77lulVfUIO/tU6nKBf/BNqtA1r+bFsvBMJ/Dy3vWIFL37nffkzb8wRCAr5ACCEhEKPqEKPq8e1PIAXanl+EX0cI+fO3Bf3FvlpqtUv6antsS8C3hHzbXNoeq4nvvp7W+sC2GkX57pza6jTR8ri2nzUhwttaUYBAUCCgtWx/o6qDJgB/6y+FBn3LupAm4Ato0IRAjKqHQW35Bak5oEFBy2nxql7BBX8IF/xBGPQ6mIwqdDrAdcn/K5ZYFYGQwLkLAfiCISTEGpAQp8LlDaLR7UNIExhijkFCrAFnXF6cONcMg16HEYlxiDPocfC0G4cb3S0ffw+3wBvQ8Pd/NcEf1BBn0CNzVCL2HT/X7jjl+ORBePG+WzFyUPyVvfk6S0RQXFws/vrXv4aXJ06cGP55y5Yt4umnnw4vP/LII2Lv3r3innvuEWfOnBFCCLF//37x8MMPd6k2kjVr1gibzSZsNptYs2ZNxPrOPF9f0Bf67As9CtE3+uwLPQrRN/rsCz0KEZ19Rrwth9vthtlsDi/r9XoEg8HwmMViCY+ZTCa43e52600mE1wuV5dqI8nLy0NdXR3q6uqQl5fXyTiUe/nll6/6OXpCX+izL/QI9I0++0KPQN/osy/0CERnnxFDwmw2w+PxhJc1TYOqqh2OeTweWCyWdus9Hg8SEhK6VEtERNEhYkjY7XY4HA4AQE1NDWw2W3gsPT0dTqcTPp8PLpcL9fX1sNlssNvtqKqqAgA4HA5kZmZ2qZaIiKKDvrCwsPByBSkpKdi6dSvWrVuHrVu3orCwEO+++y6ampowZswYqKqK5cuX45133sGjjz6K1NRU3HzzzVi7di3+/Oc/49y5c3jiiScwcODATtcaDIYemv5F48eP7/HXvBJ9oc++0CPQN/rsCz0CfaPPvtAjEH199tvrJIiIKDJ+nwQREUkxJIiISIohQUREUgwJIiKSYkgQEZEUQ4KIiKT69bfVRLrDbXfbu3cvXnzxRZSVleHIkSN48sknoSgKbrjhBjzzzDPQ6XR4+eWX8cknn0BVVSxduhTp6eldqr0agUAAS5cuxYkTJ+D3+5Gbm4vrr78+qvoMhUJ46qmn8OWXX0Kv12PFihUQQkRVj23Onj2LGTNm4PXXX4eqqlHZ4z333BO+Tc6IESMwa9YsPP/889Dr9Zg4cSIeffRR6fumpqam07VXa926dfjoo48QCARw//3347bbbouq7fn222/jnXfeAQD4fD7s378fZWVlUbktI+rle0f1qs2bN4uCggIhhBB79uwRCxYs6LHXfvXVV0VOTo647777hBBCzJ8/X+zcuVMIIcTTTz8tPvzwQ1FbWyvmzp0rNE0TJ06cEDNmzOhy7dV46623RFFRkRBCiKamJjF58uSo6/Nvf/ubePLJJ4UQQuzcuVMsWLAg6noUQgi/3y8eeeQR8ZOf/EQcOnQoKnv0er3iZz/7Wbt106dPF0eOHBGapolf/epXora2Vvq+6Urt1di5c6eYP3++CIVCwu12izVr1kTl9mxTWFgoysvLo3Jbdka//rjJ6XRi0qRJAICMjAzU1tb22GtbrVaUlpaGlz///HPcdtttAICsrCxUV1fD6XRi4sSJUBQF1113HUKhEJqamrpUezV++tOf4te//nV4Wa/XR12fd955J5577jkAwMmTJzFkyJCo6xEAVq5cidmzZyMpKQlAdP59HzhwAM3NzZg3bx4efPBB7N69G36/H1arFYqiYOLEidixY0eH7xu3293p2qu1bds22Gw2LFy4EAsWLMCPf/zjqNyeAPDZZ5/h0KFDuOuuu6JyW3ZGvw6Jy93htrtlZ2eHb5QItNwXv+2LdS69c+6l/bWt70rt1TCZTDCbzXC73Xjsscfw+OOPR2WfqqqioKAAzz33HLKzs6Oux7fffhuDBg0Kv8GB6Pz7jo2NxS9/+UusX78ezz77LJYsWdLuC8Bkr63X66X9dMd77Ouvv0ZtbS1eeuklPPvss8jPz4/K7Qm0fCy2cOHCLm2fntyWndGvj0lc7g63PU2nu5jXke6c25Xaq3Xq1CksXLgQc+bMwd13343f/e53UdnnypUrkZ+fj5kzZ8Ln80VVj5s2bYKiKNixYwf279+PgoKCdr+lRkOPAJCcnIxRo0ZBURQkJyfDYrHg3Llz33ltr9f7nfdNR/3Iaq/2PTZw4ECkpKTAaDQiJSUFMTEx+Oqrr77z2r29Pc+fP4/Dhw9jwoQJcLvdnd4+PbktO6Nf70lc7g63PS0tLQ27du0C0HI33HHjxsFut2Pbtm3QNA0nT56EpmkYNGhQl2qvRmNjI+bNm4cnnngCP//5z6Oyz7/85S9Yt24dACAuLg6KomDMmDFR1eOf/vQnvPHGGygrK8NNN92ElStXIisrK6p6BIC33noLJSUlAIDTp0+jubkZ8fHxOHr0KIQQ2LZtW/i1v/2+MZvNMBgMnaq9WpmZmdi6dSuEEOE+b7/99qjbnrt378Ydd9wBAF3aPj25LTujX9/gr+1sgYMHD0IIgeLiYowePbrHXv/48eP4zW9+g8rKSnz55Zd4+umnEQgEkJKSgqKiIuj1epSWlsLhcEDTNCxZsgTjxo3rUu3VKCoqwgcffICUlJTwut/+9rcoKiqKmj4vXLiAJUuWoLGxEcFgEA8//DBGjx4ddduyzdy5c1FYWAidThd1Pfr9fixZsgQnT56EoijIz8+HTqdDcXExQqEQJk6ciEWLFknfNzU1NZ2uvVovvPACdu3aBSEEFi1ahBEjRkTd9nzttdegqmr465i7sn16cltG0q9DgoiILq9ff9xERESXx5AgIiIphgQREUkxJIiISIohQUREUgwJIiKSYkgQEZHU/wcxE7oLV8KDVAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.distplot(bcback(predict,maxlog))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_cab=pd.DataFrame()\n",
    "result_cab[\"SaleID\"]=range(150000,200000)\n",
    "result_cab[\"price\"]=bcback(predict,maxlog)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    50000.000000\n",
       "mean      2212.614232\n",
       "std       4059.055011\n",
       "min         37.505177\n",
       "25%        227.531552\n",
       "50%        492.054132\n",
       "75%       2151.080121\n",
       "max      72394.819162\n",
       "Name: price, dtype: float64"
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_cab[\"price\"].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_cab.to_csv(\"result_cab.csv\",index=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.2 Model-XGBOOST"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Params tuning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=pd.read_csv(\"data_xgb.csv\")\n",
    "X_train=data[data.SaleID<200000].drop(\"price\",axis=1)\n",
    "bc_y_train, maxlog= stats.boxcox(data[data.SaleID<200000][\"price\"]) #bc_y 是box-cox后的数据，lamda是变换参数\n",
    "y_train=bc_y_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████| 2/2 [05:35<00:00, 167.87s/trial, best loss: 370.83]\n"
     ]
    }
   ],
   "source": [
    "def f_xgb(params):\n",
    "    clf = xgb.XGBRegressor(**params)\n",
    "    kf = KFold(n_splits=5, random_state=42)  # sklearn的交叉验证模块，用于划分数据\n",
    "    MAE=[]\n",
    "     # 交叉验证划分此时的训练集和验证集\n",
    "    train_x=X_train\n",
    "    for train, test in kf.split(train_x):\n",
    "        kf_X_tr = train_x.iloc[train]  # 训练集集\n",
    "        kf_y_tr = bc_y_train[train]    # 训练集标签值\n",
    "        kf_X_val =train_x.iloc[test]  # 验证集\n",
    "        kf_y_val = bc_y_train[test]    # 验证集标签值\n",
    "        clf = lgb.LGBMRegressor(objective='regression',metric= 'mae',silent=1,num_leaves=80,learning_rate=0.03, n_estimators=300)\n",
    "        clf.fit(kf_X_tr,kf_y_tr,verbose=0) \n",
    "        result=clf.predict(kf_X_val)\n",
    "        MAE.append(mean_absolute_error(bcback(kf_y_val,maxlog),bcback(result,maxlog)))\n",
    "    return {'loss': round(np.mean(MAE),2), 'status': STATUS_OK}\n",
    "\n",
    "# hp.normal，hp.lognormal，hp.quniform\n",
    "space_xgb = {\n",
    "        \"objective\":'reg:squarederror',\n",
    "        \"eval_metric\":\"mae\",\n",
    "        'max_depth': hp.choice('max_depth', range(2,15)),\n",
    "        \"learning_rate\":hp.uniform(\"learning_rate\",0.01,0.1),\n",
    "        \"reg_alpha\":hp.uniform(\"reg_alpha\",0,0.05),\n",
    "        \"reg_lambda\":hp.uniform(\"reg_lambda\",0,0.05)}\n",
    "trials=Trials()\n",
    "best_xgb = fmin(f_xgb, space_xgb, algo=tpe.suggest, max_evals=2, trials=trials)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'learning_rate': 0.06865186354555616,\n",
       " 'max_depth': 7,\n",
       " 'reg_alpha': 0.00780928407267999,\n",
       " 'reg_lambda': 0.021773428713610057}"
      ]
     },
     "execution_count": 170,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_xgb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
       "             colsample_bynode=1, colsample_bytree=1, gamma=0,\n",
       "             importance_type='gain', learning_rate=0.06865186354555616,\n",
       "             max_delta_step=0, max_depth=7, min_child_weight=1, missing=None,\n",
       "             n_estimators=100, n_jobs=1, nthread=None,\n",
       "             objective='reg:squarederror', random_state=0,\n",
       "             reg_alpha=0.00780928407267999, reg_lambda=0.021773428713610057,\n",
       "             scale_pos_weight=1, seed=None, silent=None, subsample=1,\n",
       "             verbosity=1)"
      ]
     },
     "execution_count": 185,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = xgb.XGBRegressor(**best_xgb,objective='reg:squarederror')\n",
    "clf.fit(X_train, bc_y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "metadata": {},
   "outputs": [],
   "source": [
    "predict=clf.predict(test_x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([], shape=(50000, 0), dtype=float32)"
      ]
     },
     "execution_count": 188,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_xgb=pd.DataFrame()\n",
    "result_xgb[\"SaleID\"]=range(150000,200000)\n",
    "result_xgb[\"price\"]=bcback(predict,maxlog)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_xgb[\"price\"].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_xgb.to_csv(\"result_xgb.csv\",index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pasle"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Model Ensemble"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.1 Bagging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "metadata": {},
   "outputs": [],
   "source": [
    "weight=[0.3,0.3,0.4]\n",
    "y_pred_lgb=pd.read_csv(\"result_lgb.csv\").price\n",
    "y_pred_cab=pd.read_csv(\"result_cab.csv\").price\n",
    "y_pred_cnn=pd.read_csv(\"results_cnn.csv\").price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred_weight=weight[0]*pd.Series(y_pred_lgb)+weight[1]*pd.Series(y_pred_cab)+weight[2]*pd.Series(y_pred_cnn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0         571.375393\n",
       "1         814.235221\n",
       "2        4047.983903\n",
       "3        1183.835313\n",
       "4         939.369828\n",
       "            ...     \n",
       "49995    2856.212595\n",
       "49996    8684.901791\n",
       "49997    2444.352394\n",
       "49998    2077.224272\n",
       "49999    2290.910907\n",
       "Name: price, Length: 50000, dtype: float64"
      ]
     },
     "execution_count": 195,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred_weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "metadata": {},
   "outputs": [],
   "source": [
    "results=pd.DataFrame()\n",
    "results[\"SaleID\"]=range(150000,200000)\n",
    "results[\"price\"]=y_pred_weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    50000.000000\n",
       "mean      3650.684632\n",
       "std       4454.304926\n",
       "min         40.335827\n",
       "25%        828.363632\n",
       "50%       2014.978772\n",
       "75%       4705.280192\n",
       "max      54905.175980\n",
       "Name: price, dtype: float64"
      ]
     },
     "execution_count": 198,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[\"price\"].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 200,
   "metadata": {},
   "outputs": [],
   "source": [
    "results.to_csv(\"result_bagging.csv\",index=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "mlgb,mxgb,mcab的权重分别设为0.3，0.5，0.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    " # 采用5折交叉验证\n",
    "def kvalid(X,k=5):\n",
    "    kf = KFold(n_splits=k, random_state=42)  # sklearn的交叉验证模块，用于划分数据\n",
    "    MAE=[]\n",
    "    MAE_lgb=[]\n",
    "    MAE_xgb=[]\n",
    "    MAE_cab=[]\n",
    "     # 交叉验证划分此时的训练集和验证集\n",
    "    for train, test in kf.split(X_train): \n",
    "        kf_X_val = X_train.reset_index(drop=True).iloc[test]  # 验证集\n",
    "        kf_y_val = y_train.reset_index(drop=True).iloc[test]    # 验证集标签值\n",
    "        MAE.append(mean_absolute_error(kf_y_val,weight_ensemble(kf_X_val,[0.3,0.5,0.2])))\n",
    "        MAE_lgb.append(mean_absolute_error(kf_y_val,mlgb.predict(kf_X_val)))\n",
    "        MAE_xgb.append(mean_absolute_error(kf_y_val,mxgb.predict(kf_X_val)))\n",
    "        MAE_cab.append(mean_absolute_error(kf_y_val,mcab.predict(kf_X_val)))\n",
    "    return {\"MAE_ensemble\":round(np.mean(MAE),2),\"MAE_lgb\":round(np.mean(MAE_lgb),2),\n",
    "            \"MAE_xgb\":round(np.mean(MAE_xgb),2),\"MAE_cab\":round(np.mean(MAE_cab),2)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "kvalid(X_train,k=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred_lgb_bc=box_cox_back(weight_ensemble(X_test,[0.3,0.5,0.2]),maxlog)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.distplot(y_pred_lgb_bc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.Series(y_pred_lgb_bc).describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "kvalid_bc(X_train,k=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.2 Stacking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [],
   "source": [
    "from mlxtend.regressor import StackingRegressor\n",
    "from mlxtend.data import boston_housing_data\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.linear_model import Ridge\n",
    "from sklearn.linear_model import Lasso\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.ensemble import RandomForestRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "metadata": {},
   "outputs": [],
   "source": [
    "#初始化模型\n",
    "lr = LinearRegression()\n",
    "ridge = Ridge(random_state=1)\n",
    "Lasso = Lasso(random_state=1)\n",
    "svr_lin = SVR(kernel='linear')\n",
    "svr_rbf = SVR(kernel='rbf')\n",
    "rf=RandomForestRegressor()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<catboost.core.CatBoostRegressor at 0x25f8e5e1a48>"
      ]
     },
     "execution_count": 191,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mlgb = lgb.LGBMRegressor(**best1)\n",
    "mlgb.fit(X_train, bc_y_train,categorical_feature=cat_feat,verbose=0) \n",
    "mcab = cab.CatBoostRegressor(**best_cab )\n",
    "mcab.fit(X_train, bc_y_train,cat_features=cat_feat_index,verbose=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_tr=pd.concat([\n",
    "    pd.Series(mlgb.predict(X_train)),\n",
    "    pd.Series(mcab.predict(X_train)),\n",
    "    pd.Series(mcab.predict(X_train))],\n",
    "    axis=1).values\n",
    "X_ts=pd.concat([\n",
    "    pd.Series(mlgb.predict(X_test)),\n",
    "    pd.Series(mxgb.predict(X_test)),\n",
    "    pd.Series(mcab.predict(X_test))],axis=1).values\n",
    "ridge.fit(X_tr,y_train)\n",
    "Stacking_result = ridge.predict(X_ts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def Stacking(X_train,bc_y_train,X_test):\n",
    "    svr_rbf = SVR(kernel='rbf')\n",
    "\n",
    "    \n",
    "\n",
    "    return Stacking_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    " # 采用5折交叉验证\n",
    "def kvalid_stacking(X_train,y_train,X_test,k=5):\n",
    "    kf = KFold(n_splits=k, random_state=42)  # sklearn的交叉验证模块，用于划分数据\n",
    "    MAE=[]\n",
    "     # 交叉验证划分此时的训练集和验证集\n",
    "    for train, test in kf.split(X_train):\n",
    "        kf_X_tr = X_train.reset_index(drop=True).iloc[train]  # 训练集集\n",
    "        kf_y_tr = y_train.reset_index(drop=True).iloc[train]    # 训练集标签值\n",
    "        kf_X_val = X_train.reset_index(drop=True).iloc[test]  # 验证集\n",
    "        kf_y_val = y_train.reset_index(drop=True).iloc[test]    # 验证集标签值\n",
    "        MAE.append(mean_absolute_error(kf_y_val,Stacking(kf_X_tr,kf_y_tr,kf_X_val)))\n",
    "    return {\"MAE_stacking\":round(np.mean(MAE),2)}"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
